From c2306789fe98946429af7462a2fd453454034a79 Mon Sep 17 00:00:00 2001 From: pst Date: Tue, 27 Feb 1996 01:59:15 +0000 Subject: Import updated Berkeley DB into CSRG branch --- lib/libc/db/btree/Makefile.inc | 4 +- lib/libc/db/btree/bt_close.c | 82 +- lib/libc/db/btree/bt_conv.c | 36 +- lib/libc/db/btree/bt_debug.c | 44 +- lib/libc/db/btree/bt_delete.c | 665 +- lib/libc/db/btree/bt_get.c | 141 +- lib/libc/db/btree/bt_open.c | 74 +- lib/libc/db/btree/bt_overflow.c | 26 +- lib/libc/db/btree/bt_page.c | 21 +- lib/libc/db/btree/bt_put.c | 106 +- lib/libc/db/btree/bt_search.c | 130 +- lib/libc/db/btree/bt_seq.c | 380 +- lib/libc/db/btree/bt_split.c | 52 +- lib/libc/db/btree/bt_utils.c | 103 +- lib/libc/db/btree/btree.h | 262 +- lib/libc/db/btree/extern.h | 14 +- lib/libc/db/docs/hash.usenix.ps | 12209 ++++++++++++++++++++++++++++++++++++ lib/libc/db/docs/libtp.usenix.ps | 12340 +++++++++++++++++++++++++++++++++++++ lib/libc/db/hash/extern.h | 24 +- lib/libc/db/hash/hash.c | 56 +- lib/libc/db/hash/hash.h | 125 +- lib/libc/db/hash/hash_bigkey.c | 76 +- lib/libc/db/hash/hash_buf.c | 28 +- lib/libc/db/hash/hash_log2.c | 12 +- lib/libc/db/hash/hash_page.c | 170 +- lib/libc/db/hash/hsearch.c | 4 +- lib/libc/db/hash/ndbm.c | 6 +- lib/libc/db/hash/page.h | 16 +- lib/libc/db/man/btree.3 | 11 +- lib/libc/db/man/hash.3 | 11 +- lib/libc/db/man/recno.3 | 26 +- lib/libc/db/mpool/mpool.c | 427 +- lib/libc/db/mpool/mpool.libtp | 746 +++ lib/libc/db/recno/extern.h | 4 +- lib/libc/db/recno/rec_close.c | 59 +- lib/libc/db/recno/rec_delete.c | 20 +- lib/libc/db/recno/rec_get.c | 103 +- lib/libc/db/recno/rec_open.c | 43 +- lib/libc/db/recno/rec_put.c | 55 +- lib/libc/db/recno/rec_search.c | 5 +- lib/libc/db/recno/rec_seq.c | 22 +- lib/libc/db/recno/rec_utils.c | 76 +- lib/libc/db/test/Makefile | 24 +- lib/libc/db/test/README | 41 +- lib/libc/db/test/dbtest.c | 209 +- lib/libc/db/test/run.test | 38 +- 46 files changed, 27446 insertions(+), 1680 deletions(-) create mode 100644 lib/libc/db/docs/hash.usenix.ps create mode 100644 lib/libc/db/docs/libtp.usenix.ps create mode 100644 lib/libc/db/mpool/mpool.libtp diff --git a/lib/libc/db/btree/Makefile.inc b/lib/libc/db/btree/Makefile.inc index 71f8dfc..8ed7649 100644 --- a/lib/libc/db/btree/Makefile.inc +++ b/lib/libc/db/btree/Makefile.inc @@ -1,7 +1,7 @@ -# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 +# @(#)Makefile.inc 8.2 (Berkeley) 7/14/94 .PATH: ${.CURDIR}/db/btree SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \ bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \ - bt_stack.c bt_utils.c + bt_utils.c diff --git a/lib/libc/db/btree/bt_close.c b/lib/libc/db/btree/bt_close.c index 66d8fc1..27f9ab6 100644 --- a/lib/libc/db/btree/bt_close.c +++ b/lib/libc/db/btree/bt_close.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_close.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_close.c 8.7 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -75,25 +75,30 @@ __bt_close(dbp) t->bt_pinned = NULL; } - /* - * Delete any already deleted record that we've been saving - * because the cursor pointed to it. - */ - if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) - return (RET_ERROR); - + /* Sync the tree. */ if (__bt_sync(dbp, 0) == RET_ERROR) return (RET_ERROR); + /* Close the memory pool. */ if (mpool_close(t->bt_mp) == RET_ERROR) return (RET_ERROR); - if (t->bt_stack) - free(t->bt_stack); - if (t->bt_kbuf) - free(t->bt_kbuf); - if (t->bt_dbuf) - free(t->bt_dbuf); + /* Free random memory. */ + if (t->bt_cursor.key.data != NULL) { + free(t->bt_cursor.key.data); + t->bt_cursor.key.size = 0; + t->bt_cursor.key.data = NULL; + } + if (t->bt_rkey.data) { + free(t->bt_rkey.data); + t->bt_rkey.size = 0; + t->bt_rkey.data = NULL; + } + if (t->bt_rdata.data) { + free(t->bt_rdata.data); + t->bt_rdata.size = 0; + t->bt_rdata.data = NULL; + } fd = t->bt_fd; free(t); @@ -117,8 +122,6 @@ __bt_sync(dbp, flags) { BTREE *t; int status; - PAGE *h; - void *p; t = dbp->internal; @@ -134,40 +137,15 @@ __bt_sync(dbp, flags) return (RET_ERROR); } - if (ISSET(t, B_INMEM | B_RDONLY) || !ISSET(t, B_MODIFIED)) + if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) return (RET_SUCCESS); - if (ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) + if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) return (RET_ERROR); - /* - * Nastiness. If the cursor has been marked for deletion, but not - * actually deleted, we have to make a copy of the page, delete the - * key/data item, sync the file, and then restore the original page - * contents. - */ - if (ISSET(t, B_DELCRSR)) { - if ((p = (void *)malloc(t->bt_psize)) == NULL) - return (RET_ERROR); - if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) - return (RET_ERROR); - memmove(p, h, t->bt_psize); - if ((status = - __bt_dleaf(t, h, t->bt_bcursor.index)) == RET_ERROR) - goto ecrsr; - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - } - if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS) - CLR(t, B_MODIFIED); - -ecrsr: if (ISSET(t, B_DELCRSR)) { - if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) - return (RET_ERROR); - memmove(h, p, t->bt_psize); - free(p); - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - } + F_CLR(t, B_MODIFIED); + return (status); } @@ -191,12 +169,12 @@ bt_meta(t) return (RET_ERROR); /* Fill in metadata. */ - m.m_magic = BTREEMAGIC; - m.m_version = BTREEVERSION; - m.m_psize = t->bt_psize; - m.m_free = t->bt_free; - m.m_nrecs = t->bt_nrecs; - m.m_flags = t->bt_flags & SAVEMETA; + m.magic = BTREEMAGIC; + m.version = BTREEVERSION; + m.psize = t->bt_psize; + m.free = t->bt_free; + m.nrecs = t->bt_nrecs; + m.flags = F_ISSET(t, SAVEMETA); memmove(p, &m, sizeof(BTMETA)); mpool_put(t->bt_mp, p, MPOOL_DIRTY); diff --git a/lib/libc/db/btree/bt_conv.c b/lib/libc/db/btree/bt_conv.c index eb49340..1cb208b1 100644 --- a/lib/libc/db/btree/bt_conv.c +++ b/lib/libc/db/btree/bt_conv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_conv.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -68,7 +68,7 @@ __bt_pgin(t, pg, pp) u_char flags; char *p; - if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); @@ -89,7 +89,7 @@ __bt_pgin(t, pg, pp) M_16_SWAP(h->linp[i]); p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { @@ -104,9 +104,9 @@ __bt_pgin(t, pg, pp) M_16_SWAP(h->linp[i]); p = (char *)GETBLEAF(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); @@ -116,7 +116,7 @@ __bt_pgin(t, pg, pp) P_32_SWAP(p); } if (flags & P_BIGDATA) { - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); @@ -136,7 +136,7 @@ __bt_pgout(t, pg, pp) u_char flags; char *p; - if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); @@ -149,7 +149,7 @@ __bt_pgout(t, pg, pp) for (i = 0; i < top; i++) { p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { @@ -164,9 +164,9 @@ __bt_pgout(t, pg, pp) for (i = 0; i < top; i++) { p = (char *)GETBLEAF(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); @@ -176,7 +176,7 @@ __bt_pgout(t, pg, pp) P_32_SWAP(p); } if (flags & P_BIGDATA) { - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); @@ -206,16 +206,16 @@ mswap(pg) char *p; p = (char *)pg; - P_32_SWAP(p); /* m_magic */ + P_32_SWAP(p); /* magic */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_version */ + P_32_SWAP(p); /* version */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_psize */ + P_32_SWAP(p); /* psize */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_free */ + P_32_SWAP(p); /* free */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_nrecs */ + P_32_SWAP(p); /* nrecs */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_flags */ + P_32_SWAP(p); /* flags */ p += sizeof(u_int32_t); } diff --git a/lib/libc/db/btree/bt_debug.c b/lib/libc/db/btree/bt_debug.c index 5f9ac1d..3aefbe7 100644 --- a/lib/libc/db/btree/bt_debug.c +++ b/lib/libc/db/btree/bt_debug.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_debug.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -65,24 +65,22 @@ __bt_dump(dbp) t = dbp->internal; (void)fprintf(stderr, "%s: pgsz %d", - ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); - if (ISSET(t, R_RECNO)) + F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); + if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " keys %lu", t->bt_nrecs); #undef X #define X(flag, name) \ - if (ISSET(t, flag)) { \ + if (F_ISSET(t, flag)) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } - if (t->bt_flags) { + if (t->flags != 0) { sep = " flags ("; - X(B_DELCRSR, "DELCRSR"); X(R_FIXLEN, "FIXLEN"); X(B_INMEM, "INMEM"); X(B_NODUPS, "NODUPS"); X(B_RDONLY, "RDONLY"); X(R_RECNO, "RECNO"); - X(B_SEQINIT, "SEQINIT"); X(B_METADIRTY,"METADIRTY"); (void)fprintf(stderr, ")\n"); } @@ -108,19 +106,19 @@ __bt_dmpage(h) char *sep; m = (BTMETA *)h; - (void)fprintf(stderr, "magic %lx\n", m->m_magic); - (void)fprintf(stderr, "version %lu\n", m->m_version); - (void)fprintf(stderr, "psize %lu\n", m->m_psize); - (void)fprintf(stderr, "free %lu\n", m->m_free); - (void)fprintf(stderr, "nrecs %lu\n", m->m_nrecs); - (void)fprintf(stderr, "flags %lu", m->m_flags); + (void)fprintf(stderr, "magic %lx\n", m->magic); + (void)fprintf(stderr, "version %lu\n", m->version); + (void)fprintf(stderr, "psize %lu\n", m->psize); + (void)fprintf(stderr, "free %lu\n", m->free); + (void)fprintf(stderr, "nrecs %lu\n", m->nrecs); + (void)fprintf(stderr, "flags %lu", m->flags); #undef X #define X(flag, name) \ - if (m->m_flags & flag) { \ + if (m->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } - if (m->m_flags) { + if (m->flags) { sep = " ("; X(B_NODUPS, "NODUPS"); X(R_RECNO, "RECNO"); @@ -192,7 +190,7 @@ __bt_dpage(h) h->lower, h->upper, top); for (cur = 0; cur < top; cur++) { (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(h, cur); (void)fprintf(stderr, @@ -214,14 +212,14 @@ __bt_dpage(h) (void)fprintf(stderr, "big key page %lu size %u/", *(pgno_t *)bl->bytes, - *(size_t *)(bl->bytes + sizeof(pgno_t))); + *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); else if (bl->ksize) (void)fprintf(stderr, "%s/", bl->bytes); if (bl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)(bl->bytes + bl->ksize), - *(size_t *)(bl->bytes + bl->ksize + + *(u_int32_t *)(bl->bytes + bl->ksize + sizeof(pgno_t))); else if (bl->dsize) (void)fprintf(stderr, "%.*s", @@ -233,7 +231,7 @@ __bt_dpage(h) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)rl->bytes, - *(size_t *)(rl->bytes + sizeof(pgno_t))); + *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); else if (rl->dsize) (void)fprintf(stderr, "%.*s", (int)rl->dsize, rl->bytes); @@ -267,7 +265,7 @@ __bt_stat(dbp) pcont = pinternal = pleaf = 0; nkeys = ifree = lfree = 0; for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BINTERNAL: case P_RINTERNAL: ++pinternal; @@ -295,7 +293,7 @@ __bt_stat(dbp) (void)mpool_put(t->bt_mp, h, 0); break; } - i = ISSET(t, R_RECNO) ? + i = F_ISSET(t, R_RECNO) ? GETRINTERNAL(h, 0)->pgno : GETBINTERNAL(h, 0)->pgno; (void)mpool_put(t->bt_mp, h, 0); @@ -303,7 +301,7 @@ __bt_stat(dbp) (void)fprintf(stderr, "%d level%s with %ld keys", levels, levels == 1 ? "" : "s", nkeys); - if (ISSET(t, R_RECNO)) + if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs); (void)fprintf(stderr, "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", diff --git a/lib/libc/db/btree/bt_delete.c b/lib/libc/db/btree/bt_delete.c index 5dba534..ece1ab6 100644 --- a/lib/libc/db/btree/bt_delete.c +++ b/lib/libc/db/btree/bt_delete.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -47,18 +47,17 @@ static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; #include #include "btree.h" -static int bt_bdelete __P((BTREE *, const DBT *)); +static int __bt_bdelete __P((BTREE *, const DBT *)); +static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); +static int __bt_pdelete __P((BTREE *, PAGE *)); +static int __bt_relink __P((BTREE *, PAGE *)); +static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); /* - * __BT_DELETE -- Delete the item(s) referenced by a key. + * __bt_delete + * Delete the item(s) referenced by a key. * - * Parameters: - * dbp: pointer to access method - * key: key to delete - * flags: R_CURSOR if deleting what the cursor references - * - * Returns: - * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + * Return RET_SPECIAL if the key is not found. */ int __bt_delete(dbp, key, flags) @@ -67,6 +66,8 @@ __bt_delete(dbp, key, flags) u_int flags; { BTREE *t; + CURSOR *c; + PAGE *h; int status; t = dbp->internal; @@ -77,242 +78,433 @@ __bt_delete(dbp, key, flags) t->bt_pinned = NULL; } - if (ISSET(t, B_RDONLY)) { + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { errno = EPERM; return (RET_ERROR); } - switch(flags) { + switch (flags) { case 0: - status = bt_bdelete(t, key); + status = __bt_bdelete(t, key); break; case R_CURSOR: /* - * If flags is R_CURSOR, delete the cursor; must already have - * started a scan and not have already deleted the record. For - * the delete cursor bit to have been set requires that the - * scan be initialized, so no reason to check. + * If flags is R_CURSOR, delete the cursor. Must already + * have started a scan and not have already deleted it. */ - if (!ISSET(t, B_SEQINIT)) - goto einval; - status = ISSET(t, B_DELCRSR) ? - RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); - break; + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT)) { + if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * If the page is about to be emptied, we'll need to + * delete it, which means we have to acquire a stack. + */ + if (NEXTINDEX(h) == 1) + if (__bt_stkacq(t, &h, &t->bt_cursor)) + return (RET_ERROR); + + status = __bt_dleaf(t, NULL, h, c->pg.index); + + if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + } else + mpool_put(t->bt_mp, + h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); + break; + } + /* FALLTHROUGH */ default: -einval: errno = EINVAL; + errno = EINVAL; return (RET_ERROR); } if (status == RET_SUCCESS) - SET(t, B_MODIFIED); + F_SET(t, B_MODIFIED); return (status); } /* - * BT_BDELETE -- Delete all key/data pairs matching the specified key. + * __bt_stkacq -- + * Acquire a stack so we can delete a cursor entry. * * Parameters: - * tree: tree + * t: tree + * hp: pointer to current, pinned PAGE pointer + * c: pointer to the cursor + * + * Returns: + * 0 on success, 1 on failure + */ +static int +__bt_stkacq(t, hp, c) + BTREE *t; + PAGE **hp; + CURSOR *c; +{ + BINTERNAL *bi; + EPG *e; + EPGNO *parent; + PAGE *h; + indx_t index; + pgno_t pgno; + recno_t nextpg, prevpg; + int exact, level; + + /* + * Find the first occurrence of the key in the tree. Toss the + * currently locked page so we don't hit an already-locked page. + */ + h = *hp; + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* See if we got it in one shot. */ + if (h->pgno == c->pg.pgno) + goto ret; + + /* + * Move right, looking for the page. At each move we have to move + * up the stack until we don't have to move to the next page. If + * we have to change pages at an internal level, we have to fix the + * stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((nextpg = h->nextpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != NEXTINDEX(h) - 1) { + index = parent->index + 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + BT_PUSH(t, pgno, 0); + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + index = 0; + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) + return (1); + } + + if (h->pgno == c->pg.pgno) + goto ret; + + /* Reacquire the original stack. */ + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* + * Move left, looking for the page. At each move we have to move + * up the stack until we don't have to change pages to move to the + * next page. If we have to change pages at an internal level, we + * have to fix the stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((prevpg = h->prevpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != 0) { + index = parent->index - 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + + index = NEXTINDEX(h) - 1; + BT_PUSH(t, pgno, index); + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) + return (1); + } + + +ret: mpool_put(t->bt_mp, h, 0); + return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); +} + +/* + * __bt_bdelete -- + * Delete all key/data pairs matching the specified key. + * + * Parameters: + * t: tree * key: key to delete * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ static int -bt_bdelete(t, key) +__bt_bdelete(t, key) BTREE *t; const DBT *key; { - EPG *e, save; + EPG *e; PAGE *h; - pgno_t cpgno, pg; - indx_t cindex; - int deleted, dirty1, dirty2, exact; + int deleted, exact, redo; + + deleted = 0; /* Find any matching record; __bt_search pins the page. */ - if ((e = __bt_search(t, key, &exact)) == NULL) - return (RET_ERROR); +loop: if ((e = __bt_search(t, key, &exact)) == NULL) + return (deleted ? RET_SUCCESS : RET_ERROR); if (!exact) { mpool_put(t->bt_mp, e->page, 0); - return (RET_SPECIAL); + return (deleted ? RET_SUCCESS : RET_SPECIAL); } /* - * Delete forward, then delete backward, from the found key. The - * ordering is so that the deletions don't mess up the page refs. - * The first loop deletes the key from the original page, the second - * unpins the original page. In the first loop, dirty1 is set if - * the original page is modified, and dirty2 is set if any subsequent - * pages are modified. In the second loop, dirty1 starts off set if - * the original page has been modified, and is set if any subsequent - * pages are modified. - * - * If find the key referenced by the cursor, don't delete it, just - * flag it for future deletion. The cursor page number is P_INVALID - * unless the sequential scan is initialized, so no reason to check. - * A special case is when the already deleted cursor record was the - * only record found. If so, then the delete opertion fails as no - * records were deleted. - * - * Cycle in place in the current page until the current record doesn't - * match the key or the page is empty. If the latter, walk forward, - * skipping empty pages and repeating until a record doesn't match - * the key or the end of the tree is reached. + * Delete forward, then delete backward, from the found key. If + * there are duplicates and we reach either side of the page, do + * the key search again, so that we get them all. */ - cpgno = t->bt_bcursor.pgno; - cindex = t->bt_bcursor.index; - save = *e; - dirty1 = 0; - for (h = e->page, deleted = 0;;) { - dirty2 = 0; - do { - if (h->pgno == cpgno && e->index == cindex) { - if (!ISSET(t, B_DELCRSR)) { - SET(t, B_DELCRSR); - deleted = 1; - } - ++e->index; - } else { - if (__bt_dleaf(t, h, e->index)) { - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); - mpool_put(t->bt_mp, save.page, dirty1); + redo = 0; + h = e->page; + do { + if (__bt_dleaf(t, key, h, e->index)) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (F_ISSET(t, B_NODUPS)) { + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) return (RET_ERROR); - } - if (h->pgno == save.page->pgno) - dirty1 = MPOOL_DIRTY; - else - dirty2 = MPOOL_DIRTY; - deleted = 1; - } - } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); - - /* - * Quit if didn't find a match, no next page, or first key on - * the next page doesn't match. Don't unpin the original page - * unless an error occurs. - */ - if (e->index < NEXTINDEX(h)) - break; - for (;;) { - if ((pg = h->nextpg) == P_INVALID) - goto done1; - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, save.page, dirty1); - return (RET_ERROR); - } - if (NEXTINDEX(h) != 0) { - e->page = h; - e->index = 0; - break; - } + } else + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); } + deleted = 1; + } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); + /* Check for right-hand edge of the page. */ + if (e->index == NEXTINDEX(h)) + redo = 1; + + /* Delete from the key to the beginning of the page. */ + while (e->index-- > 0) { if (__bt_cmp(t, key, e) != 0) break; + if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (e->index == 0) + redo = 1; } - /* - * Reach here with the original page and the last page referenced - * pinned (they may be the same). Release it if not the original. - */ -done1: if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); + /* Check for an empty page. */ + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + goto loop; + } + + /* Put the page. */ + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + if (redo) + goto loop; + return (RET_SUCCESS); +} + +/* + * __bt_pdelete -- + * Delete a single page from the tree. + * + * Parameters: + * t: tree + * h: leaf page + * + * Returns: + * RET_SUCCESS, RET_ERROR. + * + * Side-effects: + * mpool_put's the page + */ +static int +__bt_pdelete(t, h) + BTREE *t; + PAGE *h; +{ + BINTERNAL *bi; + PAGE *pg; + EPGNO *parent; + indx_t cnt, index, *ip, offset; + u_int32_t nksize; + char *from; /* - * Walk backwards from the record previous to the record returned by - * __bt_search, skipping empty pages, until a record doesn't match - * the key or reach the beginning of the tree. + * Walk the parent page stack -- a LIFO stack of the pages that were + * traversed when we searched for the page where the delete occurred. + * Each stack entry is a page number and a page index offset. The + * offset is for the page traversed on the search. We've just deleted + * a page, so we have to delete the key from the parent page. + * + * If the delete from the parent page makes it empty, this process may + * continue all the way up the tree. We stop if we reach the root page + * (which is never deleted, it's just not worth the effort) or if the + * delete does not empty the page. */ - *e = save; - for (;;) { - if (e->index) - --e->index; - for (h = e->page; e->index; --e->index) { - if (__bt_cmp(t, key, e) != 0) - goto done2; - if (h->pgno == cpgno && e->index == cindex) { - if (!ISSET(t, B_DELCRSR)) { - SET(t, B_DELCRSR); - deleted = 1; - } + while ((parent = BT_POP(t)) != NULL) { + /* Get the parent page. */ + if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (RET_ERROR); + + index = parent->index; + bi = GETBINTERNAL(pg, index); + + /* Free any overflow pages. */ + if (bi->flags & P_BIGKEY && + __ovfl_delete(t, bi->bytes) == RET_ERROR) { + mpool_put(t->bt_mp, pg, 0); + return (RET_ERROR); + } + + /* + * Free the parent if it has only the one key and it's not the + * root page. If it's the rootpage, turn it back into an empty + * leaf page. + */ + if (NEXTINDEX(pg) == 1) + if (pg->pgno == P_ROOT) { + pg->lower = BTDATAOFF; + pg->upper = t->bt_psize; + pg->flags = P_BLEAF; } else { - if (__bt_dleaf(t, h, e->index) == RET_ERROR) { - mpool_put(t->bt_mp, h, dirty1); + if (__bt_relink(t, pg) || __bt_free(t, pg)) return (RET_ERROR); - } - if (h->pgno == save.page->pgno) - dirty1 = MPOOL_DIRTY; - deleted = 1; + continue; } + else { + /* Pack remaining key items at the end of the page. */ + nksize = NBINTERNAL(bi->ksize); + from = (char *)pg + pg->upper; + memmove(from + nksize, from, (char *)bi - from); + pg->upper += nksize; + + /* Adjust indices' offsets, shift the indices down. */ + offset = pg->linp[index]; + for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nksize; + for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; + pg->lower -= sizeof(indx_t); } - if ((pg = h->prevpg) == P_INVALID) - goto done2; - mpool_put(t->bt_mp, h, dirty1); - dirty1 = 0; - if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - e->index = NEXTINDEX(e->page); + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + break; } - /* - * Reach here with the last page that was looked at pinned. Release - * it. - */ -done2: mpool_put(t->bt_mp, h, dirty1); - return (deleted ? RET_SUCCESS : RET_SPECIAL); + /* Free the leaf page, as long as it wasn't the root. */ + if (h->pgno == P_ROOT) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); + } + return (__bt_relink(t, h) || __bt_free(t, h)); } /* - * __BT_DLEAF -- Delete a single record from a leaf page. + * __bt_dleaf -- + * Delete a single record from a leaf page. * * Parameters: * t: tree - * index: index on current page to delete + * key: referenced key + * h: page + * index: index on page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ int -__bt_dleaf(t, h, index) +__bt_dleaf(t, key, h, index) BTREE *t; + const DBT *key; PAGE *h; - indx_t index; + u_int index; { - register BLEAF *bl; - register indx_t cnt, *ip, offset; - register size_t nbytes; - char *from; + BLEAF *bl; + indx_t cnt, *ip, offset; + u_int32_t nbytes; void *to; + char *from; - /* - * Delete a record from a btree leaf page. Internal records are never - * deleted from internal pages, regardless of the records that caused - * them to be added being deleted. Pages made empty by deletion are - * not reclaimed. They are, however, made available for reuse. - * - * Pack the remaining entries at the end of the page, shift the indices - * down, overwriting the deleted record and its index. If the record - * uses overflow pages, make them available for reuse. - */ + /* If this record is referenced by the cursor, delete the cursor. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && + __bt_curdel(t, key, h, index)) + return (RET_ERROR); + + /* If the entry uses overflow pages, make them available for reuse. */ to = bl = GETBLEAF(h, index); if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) return (RET_ERROR); if (bl->flags & P_BIGDATA && __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) return (RET_ERROR); - nbytes = NBLEAF(bl); - /* - * Compress the key/data pairs. Compress and adjust the [BR]LEAF - * offsets. Reset the headers. - */ + /* Pack the remaining key/data items at the end of the page. */ + nbytes = NBLEAF(bl); from = (char *)h + h->upper; memmove(from + nbytes, from, (char *)to - from); h->upper += nbytes; + /* Adjust the indices' offsets, shift the indices down. */ offset = h->linp[index]; for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) if (ip[0] < offset) @@ -320,5 +512,146 @@ __bt_dleaf(t, h, index) for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; h->lower -= sizeof(indx_t); + + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) + --t->bt_cursor.pg.index; + return (RET_SUCCESS); } + +/* + * __bt_curdel -- + * Delete the cursor. + * + * Parameters: + * t: tree + * key: referenced key (or NULL) + * h: page + * index: index on page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +static int +__bt_curdel(t, key, h, index) + BTREE *t; + const DBT *key; + PAGE *h; + u_int index; +{ + CURSOR *c; + EPG e; + PAGE *pg; + int curcopy, status; + + /* + * If there are duplicates, move forward or backward to one. + * Otherwise, copy the key into the cursor area. + */ + c = &t->bt_cursor; + F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); + + curcopy = 0; + if (!F_ISSET(t, B_NODUPS)) { + /* + * We're going to have to do comparisons. If we weren't + * provided a copy of the key, i.e. the user is deleting + * the current cursor position, get one. + */ + if (key == NULL) { + e.page = h; + e.index = index; + if ((status = __bt_ret(t, &e, + &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) + return (status); + curcopy = 1; + key = &c->key; + } + /* Check previous key, if not at the beginning of the page. */ + if (index > 0) { + e.page = h; + e.index = index - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup2; + } + } + /* Check next key, if not at the end of the page. */ + if (index < NEXTINDEX(h) - 1) { + e.page = h; + e.index = index + 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); + goto dup2; + } + } + /* Check previous key if at the beginning of the page. */ + if (index == 0 && h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = NEXTINDEX(pg) - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup1; + } + mpool_put(t->bt_mp, pg, 0); + } + /* Check next key if at the end of the page. */ + if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = 0; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); +dup1: mpool_put(t->bt_mp, pg, 0); +dup2: c->pg.pgno = e.page->pgno; + c->pg.index = e.index; + return (RET_SUCCESS); + } + mpool_put(t->bt_mp, pg, 0); + } + } + e.page = h; + e.index = index; + if (curcopy || (status = + __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { + F_SET(c, CURS_ACQUIRE); + return (RET_SUCCESS); + } + return (status); +} + +/* + * __bt_relink -- + * Link around a deleted page. + * + * Parameters: + * t: tree + * h: page to be deleted + */ +static int +__bt_relink(t, h) + BTREE *t; + PAGE *h; +{ + PAGE *pg; + + if (h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + pg->prevpg = h->prevpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + if (h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + pg->nextpg = h->nextpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + return (0); +} diff --git a/lib/libc/db/btree/bt_get.c b/lib/libc/db/btree/bt_get.c index 28b2d60..74824c7 100644 --- a/lib/libc/db/btree/bt_get.c +++ b/lib/libc/db/btree/bt_get.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_get.c 8.2 (Berkeley) 9/7/93"; +static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -91,148 +91,15 @@ __bt_get(dbp, key, data, flags) return (RET_SPECIAL); } - /* - * A special case is if we found the record but it's flagged for - * deletion. In this case, we want to find another record with the - * same key, if it exists. Rather than look around the tree we call - * __bt_first and have it redo the search, as __bt_first will not - * return keys marked for deletion. Slow, but should never happen. - */ - if (ISSET(t, B_DELCRSR) && e->page->pgno == t->bt_bcursor.pgno && - e->index == t->bt_bcursor.index) { - mpool_put(t->bt_mp, e->page, 0); - if ((e = __bt_first(t, key, &exact)) == NULL) - return (RET_ERROR); - if (!exact) - return (RET_SPECIAL); - } + status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0); - status = __bt_ret(t, e, NULL, data); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); } - -/* - * __BT_FIRST -- Find the first entry. - * - * Parameters: - * t: the tree - * key: the key - * - * Returns: - * The first entry in the tree greater than or equal to key. - */ -EPG * -__bt_first(t, key, exactp) - BTREE *t; - const DBT *key; - int *exactp; -{ - register PAGE *h; - register EPG *e; - EPG save; - pgno_t cpgno, pg; - indx_t cindex; - int found; - - /* - * Find any matching record; __bt_search pins the page. Only exact - * matches are tricky, otherwise just return the location of the key - * if it were to be inserted into the tree. - */ - if ((e = __bt_search(t, key, exactp)) == NULL) - return (NULL); - if (!*exactp) - return (e); - - if (ISSET(t, B_DELCRSR)) { - cpgno = t->bt_bcursor.pgno; - cindex = t->bt_bcursor.index; - } else { - cpgno = P_INVALID; - cindex = 0; /* GCC thinks it's uninitialized. */ - } - - /* - * Walk backwards, skipping empty pages, as long as the entry matches - * and there are keys left in the tree. Save a copy of each match in - * case we go too far. A special case is that we don't return a match - * on records that the cursor references that have already been flagged - * for deletion. - */ - save = *e; - h = e->page; - found = 0; - do { - if (cpgno != h->pgno || cindex != e->index) { - if (save.page->pgno != e->page->pgno) { - mpool_put(t->bt_mp, save.page, 0); - save = *e; - } else - save.index = e->index; - found = 1; - } - /* - * Make a special effort not to unpin the page the last (or - * original) match was on, but also make sure it's unpinned - * if an error occurs. - */ - while (e->index == 0) { - if (h->prevpg == P_INVALID) - goto done1; - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) { - if (h->pgno == save.page->pgno) - mpool_put(t->bt_mp, save.page, 0); - return (NULL); - } - e->page = h; - e->index = NEXTINDEX(h); - } - --e->index; - } while (__bt_cmp(t, key, e) == 0); - - /* - * Reach here with the last page that was looked at pinned, which may - * or may not be the same as the last (or original) match page. If - * it's not useful, release it. - */ -done1: if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - - /* - * If still haven't found a record, the only possibility left is the - * next one. Move forward one slot, skipping empty pages and check. - */ - if (!found) { - h = save.page; - if (++save.index == NEXTINDEX(h)) { - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) { - *exactp = 0; - return (e); - } - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (NULL); - } while ((save.index = NEXTINDEX(h)) == 0); - save.page = h; - } - if (__bt_cmp(t, key, &save) != 0) { - *exactp = 0; - return (e); - } - } - *e = save; - *exactp = 1; - return (e); -} diff --git a/lib/libc/db/btree/bt_open.c b/lib/libc/db/btree/bt_open.c index ec79cf7..f052249 100644 --- a/lib/libc/db/btree/bt_open.c +++ b/lib/libc/db/btree/bt_open.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_open.c 8.5 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_open.c 8.10 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ /* @@ -61,6 +61,11 @@ static char sccsid[] = "@(#)bt_open.c 8.5 (Berkeley) 2/21/94"; #include #include "btree.h" +#ifdef DEBUG +#undef MINPSIZE +#define MINPSIZE 128 +#endif + static int byteorder __P((void)); static int nroot __P((BTREE *)); static int tmp __P((void)); @@ -157,7 +162,6 @@ __bt_open(fname, flags, mode, openinfo, dflags) if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) goto err; memset(t, 0, sizeof(BTREE)); - t->bt_bcursor.pgno = P_INVALID; t->bt_fd = -1; /* Don't close unopened fd on error. */ t->bt_lorder = b.lorder; t->bt_order = NOT; @@ -167,9 +171,9 @@ __bt_open(fname, flags, mode, openinfo, dflags) if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) goto err; - t->bt_flags = 0; + memset(t->bt_dbp, 0, sizeof(DB)); if (t->bt_lorder != machine_lorder) - SET(t, B_NEEDSWAP); + F_SET(t, B_NEEDSWAP); dbp->type = DB_BTREE; dbp->internal = t; @@ -186,9 +190,9 @@ __bt_open(fname, flags, mode, openinfo, dflags) * open a backing temporary file. Otherwise, it's a disk-based tree. */ if (fname) { - switch(flags & O_ACCMODE) { + switch (flags & O_ACCMODE) { case O_RDONLY: - SET(t, B_RDONLY); + F_SET(t, B_RDONLY); break; case O_RDWR: break; @@ -205,7 +209,7 @@ __bt_open(fname, flags, mode, openinfo, dflags) goto einval; if ((t->bt_fd = tmp()) == -1) goto err; - SET(t, B_INMEM); + F_SET(t, B_INMEM); } if (fcntl(t->bt_fd, F_SETFD, 1) == -1) @@ -214,8 +218,7 @@ __bt_open(fname, flags, mode, openinfo, dflags) if (fstat(t->bt_fd, &sb)) goto err; if (sb.st_size) { - nr = read(t->bt_fd, &m, sizeof(BTMETA)); - if (nr < 0) + if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0) goto err; if (nr != sizeof(BTMETA)) goto eftype; @@ -228,28 +231,28 @@ __bt_open(fname, flags, mode, openinfo, dflags) * don't bother to return an error, we just clear the NEEDSWAP * bit. */ - if (m.m_magic == BTREEMAGIC) - CLR(t, B_NEEDSWAP); + if (m.magic == BTREEMAGIC) + F_CLR(t, B_NEEDSWAP); else { - SET(t, B_NEEDSWAP); - M_32_SWAP(m.m_magic); - M_32_SWAP(m.m_version); - M_32_SWAP(m.m_psize); - M_32_SWAP(m.m_free); - M_32_SWAP(m.m_nrecs); - M_32_SWAP(m.m_flags); + F_SET(t, B_NEEDSWAP); + M_32_SWAP(m.magic); + M_32_SWAP(m.version); + M_32_SWAP(m.psize); + M_32_SWAP(m.free); + M_32_SWAP(m.nrecs); + M_32_SWAP(m.flags); } - if (m.m_magic != BTREEMAGIC || m.m_version != BTREEVERSION) + if (m.magic != BTREEMAGIC || m.version != BTREEVERSION) goto eftype; - if (m.m_psize < MINPSIZE || m.m_psize > MAX_PAGE_OFFSET + 1 || - m.m_psize & sizeof(indx_t) - 1) + if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 || + m.psize & sizeof(indx_t) - 1) goto eftype; - if (m.m_flags & ~SAVEMETA) + if (m.flags & ~SAVEMETA) goto eftype; - b.psize = m.m_psize; - t->bt_flags |= m.m_flags; - t->bt_free = m.m_free; - t->bt_nrecs = m.m_nrecs; + b.psize = m.psize; + F_SET(t, m.flags); + t->bt_free = m.free; + t->bt_nrecs = m.nrecs; } else { /* * Set the page size to the best value for I/O to this file. @@ -265,11 +268,11 @@ __bt_open(fname, flags, mode, openinfo, dflags) /* Set flag if duplicates permitted. */ if (!(b.flags & R_DUP)) - SET(t, B_NODUPS); + F_SET(t, B_NODUPS); t->bt_free = P_INVALID; t->bt_nrecs = 0; - SET(t, B_METADIRTY); + F_SET(t, B_METADIRTY); } t->bt_psize = b.psize; @@ -304,7 +307,7 @@ __bt_open(fname, flags, mode, openinfo, dflags) if ((t->bt_mp = mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) goto err; - if (!ISSET(t, B_INMEM)) + if (!F_ISSET(t, B_INMEM)) mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); /* Create a root page if new tree. */ @@ -313,11 +316,11 @@ __bt_open(fname, flags, mode, openinfo, dflags) /* Global flags. */ if (dflags & DB_LOCK) - SET(t, B_DB_LOCK); + F_SET(t, B_DB_LOCK); if (dflags & DB_SHMEM) - SET(t, B_DB_SHMEM); + F_SET(t, B_DB_SHMEM); if (dflags & DB_TXN) - SET(t, B_DB_TXN); + F_SET(t, B_DB_TXN); return (dbp); @@ -357,8 +360,9 @@ nroot(t) mpool_put(t->bt_mp, meta, 0); return (RET_SUCCESS); } - if (errno != EINVAL) + if (errno != EINVAL) /* It's OK to not exist. */ return (RET_ERROR); + errno = 0; if ((meta = mpool_new(t->bt_mp, &npg)) == NULL) return (RET_ERROR); @@ -432,7 +436,7 @@ __bt_fd(dbp) } /* In-memory database can't have a file descriptor. */ - if (ISSET(t, B_INMEM)) { + if (F_ISSET(t, B_INMEM)) { errno = ENOENT; return (-1); } diff --git a/lib/libc/db/btree/bt_overflow.c b/lib/libc/db/btree/bt_overflow.c index 0057a03..b28b8e0 100644 --- a/lib/libc/db/btree/bt_overflow.c +++ b/lib/libc/db/btree/bt_overflow.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -69,7 +69,7 @@ static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94"; * * Parameters: * t: tree - * p: pointer to { pgno_t, size_t } + * p: pointer to { pgno_t, u_int32_t } * buf: storage address * bufsz: storage size * @@ -81,15 +81,16 @@ __ovfl_get(t, p, ssz, buf, bufsz) BTREE *t; void *p; size_t *ssz; - char **buf; + void **buf; size_t *bufsz; { PAGE *h; pgno_t pg; - size_t nb, plen, sz; + size_t nb, plen; + u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); *ssz = sz; #ifdef DEBUG @@ -98,7 +99,8 @@ __ovfl_get(t, p, ssz, buf, bufsz) #endif /* Make the buffer bigger as necessary. */ if (*bufsz < sz) { - if ((*buf = (char *)realloc(*buf, sz)) == NULL) + *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); + if (*buf == NULL) return (RET_ERROR); *bufsz = sz; } @@ -142,7 +144,8 @@ __ovfl_put(t, dbt, pg) PAGE *h, *last; void *p; pgno_t npg; - size_t nb, plen, sz; + size_t nb, plen; + u_int32_t sz; /* * Allocate pages and copy the key/data record into them. Store the @@ -181,7 +184,7 @@ __ovfl_put(t, dbt, pg) * * Parameters: * t: tree - * p: pointer to { pgno_t, size_t } + * p: pointer to { pgno_t, u_int32_t } * * Returns: * RET_ERROR, RET_SUCCESS @@ -193,10 +196,11 @@ __ovfl_delete(t, p) { PAGE *h; pgno_t pg; - size_t plen, sz; + size_t plen; + u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); #ifdef DEBUG if (pg == P_INVALID || sz == 0) diff --git a/lib/libc/db/btree/bt_page.c b/lib/libc/db/btree/bt_page.c index f71a40d..0d9d138 100644 --- a/lib/libc/db/btree/bt_page.c +++ b/lib/libc/db/btree/bt_page.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -43,7 +43,8 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; #include "btree.h" /* - * __BT_FREE -- Put a page on the freelist. + * __bt_free -- + * Put a page on the freelist. * * Parameters: * t: tree @@ -51,13 +52,16 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; * * Returns: * RET_ERROR, RET_SUCCESS + * + * Side-effect: + * mpool_put's the page. */ int __bt_free(t, h) BTREE *t; PAGE *h; { - /* Insert the page at the start of the free list. */ + /* Insert the page at the head of the free list. */ h->prevpg = P_INVALID; h->nextpg = t->bt_free; t->bt_free = h->pgno; @@ -67,7 +71,8 @@ __bt_free(t, h) } /* - * __BT_NEW -- Get a new page, preferably from the freelist. + * __bt_new -- + * Get a new page, preferably from the freelist. * * Parameters: * t: tree @@ -85,9 +90,9 @@ __bt_new(t, npg) if (t->bt_free != P_INVALID && (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { - *npg = t->bt_free; - t->bt_free = h->nextpg; - return (h); + *npg = t->bt_free; + t->bt_free = h->nextpg; + return (h); } return (mpool_new(t->bt_mp, npg)); } diff --git a/lib/libc/db/btree/bt_put.c b/lib/libc/db/btree/bt_put.c index 11a211b..952be09 100644 --- a/lib/libc/db/btree/bt_put.c +++ b/lib/libc/db/btree/bt_put.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_put.c 8.3 (Berkeley) 9/16/93"; +static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -76,7 +76,7 @@ __bt_put(dbp, key, data, flags) PAGE *h; indx_t index, nxtindex; pgno_t pg; - size_t nbytes; + u_int32_t nbytes; int dflags, exact, status; char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; @@ -88,33 +88,38 @@ __bt_put(dbp, key, data, flags) t->bt_pinned = NULL; } + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + switch (flags) { - case R_CURSOR: - if (!ISSET(t, B_SEQINIT)) - goto einval; - if (ISSET(t, B_DELCRSR)) - goto einval; - break; case 0: case R_NOOVERWRITE: break; + case R_CURSOR: + /* + * If flags is R_CURSOR, put the cursor. Must already + * have started a scan and not have already deleted it. + */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, + CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + break; + /* FALLTHROUGH */ default: -einval: errno = EINVAL; - return (RET_ERROR); - } - - if (ISSET(t, B_RDONLY)) { - errno = EPERM; + errno = EINVAL; return (RET_ERROR); } /* - * If the key/data won't fit on a page, store it on indirect pages. - * Only store the key on the overflow page if it's too big after the - * data is on an overflow page. + * If the key/data pair won't fit on a page, store it on overflow + * pages. Only put the key on the overflow page if the pair are + * still too big after moving the data to an overflow page. * * XXX - * If the insert fails later on, these pages aren't recovered. + * If the insert fails later on, the overflow pages aren't recovered. */ dflags = 0; if (key->size + data->size > t->bt_ovflsize) { @@ -125,7 +130,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) tkey.size = NOVFLSIZE; memmove(kb, &pg, sizeof(pgno_t)); memmove(kb + sizeof(pgno_t), - &key->size, sizeof(size_t)); + &key->size, sizeof(u_int32_t)); dflags |= P_BIGKEY; key = &tkey; } @@ -136,7 +141,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) tdata.size = NOVFLSIZE; memmove(db, &pg, sizeof(pgno_t)); memmove(db + sizeof(pgno_t), - &data->size, sizeof(size_t)); + &data->size, sizeof(u_int32_t)); dflags |= P_BIGDATA; data = &tdata; } @@ -146,15 +151,15 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) /* Replace the cursor. */ if (flags == R_CURSOR) { - if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) + if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) return (RET_ERROR); - index = t->bt_bcursor.index; + index = t->bt_cursor.pg.index; goto delete; } /* - * Find the key to delete, or, the location at which to insert. Bt_fast - * and __bt_search pin the returned page. + * Find the key to delete, or, the location at which to insert. + * Bt_fast and __bt_search both pin the returned page. */ if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) if ((e = __bt_search(t, key, &exact)) == NULL) @@ -163,34 +168,26 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) index = e->index; /* - * Add the specified key/data pair to the tree. If an identical key - * is already in the tree, and R_NOOVERWRITE is set, an error is - * returned. If R_NOOVERWRITE is not set, the key is either added (if - * duplicates are permitted) or an error is returned. - * - * Pages are split as required. + * Add the key/data pair to the tree. If an identical key is already + * in the tree, and R_NOOVERWRITE is set, an error is returned. If + * R_NOOVERWRITE is not set, the key is either added (if duplicates are + * permitted) or an error is returned. */ switch (flags) { case R_NOOVERWRITE: if (!exact) break; - /* - * One special case is if the cursor references the record and - * it's been flagged for deletion. Then, we delete the record, - * leaving the cursor there -- this means that the inserted - * record will not be seen in a cursor scan. - */ - if (ISSET(t, B_DELCRSR) && t->bt_bcursor.pgno == h->pgno && - t->bt_bcursor.index == index) { - CLR(t, B_DELCRSR); - goto delete; - } mpool_put(t->bt_mp, h, 0); return (RET_SPECIAL); default: - if (!exact || !ISSET(t, B_NODUPS)) + if (!exact || !F_ISSET(t, B_NODUPS)) break; -delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { + /* + * !!! + * Note, the delete may empty the page, so we need to put a + * new entry into the page immediately. + */ +delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } @@ -220,6 +217,12 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { dest = (char *)h + h->upper; WR_BLEAF(dest, key, data, dflags); + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index) + ++t->bt_cursor.pg.index; + if (t->bt_order == NOT) if (h->nextpg == P_INVALID) { if (index == NEXTINDEX(h) - 1) { @@ -238,11 +241,10 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, MPOOL_DIRTY); success: - if (flags == R_SETCURSOR) { - t->bt_bcursor.pgno = e->page->pgno; - t->bt_bcursor.index = e->index; - } - SET(t, B_MODIFIED); + if (flags == R_SETCURSOR) + __bt_setcur(t, e->page->pgno, e->index); + + F_SET(t, B_MODIFIED); return (RET_SUCCESS); } @@ -267,7 +269,7 @@ bt_fast(t, key, data, exactp) int *exactp; { PAGE *h; - size_t nbytes; + u_int32_t nbytes; int cmp; if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { @@ -278,8 +280,8 @@ bt_fast(t, key, data, exactp) t->bt_cur.index = t->bt_last.index; /* - * If won't fit in this page or have too many keys in this page, have - * to search to get split stack. + * If won't fit in this page or have too many keys in this page, + * have to search to get split stack. */ nbytes = NBLEAFDBT(key->size, data->size); if (h->upper - h->lower < nbytes + sizeof(indx_t)) diff --git a/lib/libc/db/btree/bt_search.c b/lib/libc/db/btree/bt_search.c index ff33421..485afcb 100644 --- a/lib/libc/db/btree/bt_search.c +++ b/lib/libc/db/btree/bt_search.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94"; +static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -45,11 +45,12 @@ static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94"; #include #include "btree.h" -static int bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); -static int bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); +static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); +static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); /* - * __BT_SEARCH -- Search a btree for a key. + * __bt_search -- + * Search a btree for a key. * * Parameters: * t: tree to search @@ -95,24 +96,26 @@ __bt_search(t, key, exactp) } /* - * If it's a leaf page, and duplicates aren't allowed, we're - * done. If duplicates are allowed, it's possible that there - * were duplicate keys on duplicate pages, and they were later - * deleted, so we could be on a page with no matches while - * there are matches on other pages. If we're at the start or - * end of a page, check on both sides. + * If it's a leaf page, we're almost done. If no duplicates + * are allowed, or we have an exact match, we're done. Else, + * it's possible that there were matching keys on this page, + * which later deleted, and we're on a page with no matches + * while there are matches on other pages. If at the start or + * end of a page, check the adjacent page. */ if (h->flags & P_BLEAF) { - t->bt_cur.index = base; - *exactp = 0; - if (!ISSET(t, B_NODUPS)) { + if (!F_ISSET(t, B_NODUPS)) { if (base == 0 && - bt_sprev(t, h, key, exactp)) + h->prevpg != P_INVALID && + __bt_sprev(t, h, key, exactp)) return (&t->bt_cur); if (base == NEXTINDEX(h) && - bt_snext(t, h, key, exactp)) + h->nextpg != P_INVALID && + __bt_snext(t, h, key, exactp)) return (&t->bt_cur); } + *exactp = 0; + t->bt_cur.index = base; return (&t->bt_cur); } @@ -125,111 +128,86 @@ __bt_search(t, key, exactp) */ index = base ? base - 1 : base; -next: if (__bt_push(t, h->pgno, index) == RET_ERROR) - return (NULL); +next: BT_PUSH(t, h->pgno, index); pg = GETBINTERNAL(h, index)->pgno; mpool_put(t->bt_mp, h, 0); } } /* - * BT_SNEXT -- Check for an exact match after the key. + * __bt_snext -- + * Check for an exact match after the key. * * Parameters: - * t: tree to search - * h: current page. - * key: key to find + * t: tree + * h: current page + * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int -bt_snext(t, h, key, exactp) +__bt_snext(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; - PAGE *tp; - pgno_t pg; - /* Skip until reach the end of the tree or a key. */ - for (pg = h->nextpg; pg != P_INVALID;) { - if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, h, 0); - return (NULL); - } - if (NEXTINDEX(tp) != 0) - break; - pg = tp->prevpg; - mpool_put(t->bt_mp, tp, 0); - } /* - * The key is either an exact match, or not as good as - * the one we already have. + * Get the next page. The key is either an exact + * match, or not as good as the one we already have. */ - if (pg != P_INVALID) { - e.page = tp; - e.index = NEXTINDEX(tp) - 1; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } + if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (0); + e.index = 0; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); } + mpool_put(t->bt_mp, e.page, 0); return (0); } /* - * BT_SPREV -- Check for an exact match before the key. + * __bt_sprev -- + * Check for an exact match before the key. * * Parameters: - * t: tree to search - * h: current page. - * key: key to find + * t: tree + * h: current page + * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int -bt_sprev(t, h, key, exactp) +__bt_sprev(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; - PAGE *tp; - pgno_t pg; - /* Skip until reach the beginning of the tree or a key. */ - for (pg = h->prevpg; pg != P_INVALID;) { - if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, h, 0); - return (NULL); - } - if (NEXTINDEX(tp) != 0) - break; - pg = tp->prevpg; - mpool_put(t->bt_mp, tp, 0); - } /* - * The key is either an exact match, or not as good as - * the one we already have. + * Get the previous page. The key is either an exact + * match, or not as good as the one we already have. */ - if (pg != P_INVALID) { - e.page = tp; - e.index = NEXTINDEX(tp) - 1; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } + if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (0); + e.index = NEXTINDEX(e.page) - 1; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); } + mpool_put(t->bt_mp, e.page, 0); return (0); } diff --git a/lib/libc/db/btree/bt_seq.c b/lib/libc/db/btree/bt_seq.c index 182ef70..303b481 100644 --- a/lib/libc/db/btree/bt_seq.c +++ b/lib/libc/db/btree/bt_seq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; +static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -48,24 +48,21 @@ static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; #include #include "btree.h" -static int bt_seqadv __P((BTREE *, EPG *, int)); -static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); +static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); +static int __bt_seqadv __P((BTREE *, EPG *, int)); +static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); /* * Sequential scan support. * - * The tree can be scanned sequentially, starting from either end of the tree - * or from any specific key. A scan request before any scanning is done is - * initialized as starting from the least node. - * - * Each tree has an EPGNO which has the current position of the cursor. The - * cursor has to survive deletions/insertions in the tree without losing its - * position. This is done by noting deletions without doing them, and then - * doing them when the cursor moves (or the tree is closed). + * The tree can be scanned sequentially, starting from either end of the + * tree or from any specific key. A scan request before any scanning is + * done is initialized as starting from the least node. */ /* - * __BT_SEQ -- Btree sequential scan interface. + * __bt_seq -- + * Btree sequential scan interface. * * Parameters: * dbp: pointer to access method @@ -96,21 +93,21 @@ __bt_seq(dbp, key, data, flags) /* * If scan unitialized as yet, or starting at a specific record, set - * the scan to a specific key. Both bt_seqset and bt_seqadv pin the - * page the cursor references if they're successful. + * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin + * the page the cursor references if they're successful. */ - switch(flags) { + switch (flags) { case R_NEXT: case R_PREV: - if (ISSET(t, B_SEQINIT)) { - status = bt_seqadv(t, &e, flags); + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + status = __bt_seqadv(t, &e, flags); break; } /* FALLTHROUGH */ - case R_CURSOR: case R_FIRST: case R_LAST: - status = bt_seqset(t, &e, key, flags); + case R_CURSOR: + status = __bt_seqset(t, &e, key, flags); break; default: errno = EINVAL; @@ -118,27 +115,26 @@ __bt_seq(dbp, key, data, flags) } if (status == RET_SUCCESS) { - status = __bt_ret(t, &e, key, data); + __bt_setcur(t, e.page->pgno, e.index); - /* Update the actual cursor. */ - t->bt_bcursor.pgno = e.page->pgno; - t->bt_bcursor.index = e.index; + status = + __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e.page, 0); else t->bt_pinned = e.page; - SET(t, B_SEQINIT); } return (status); } /* - * BT_SEQSET -- Set the sequential scan to a specific key. + * __bt_seqset -- + * Set the sequential scan to a specific key. * * Parameters: * t: tree @@ -153,87 +149,50 @@ __bt_seq(dbp, key, data, flags) * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int -bt_seqset(t, ep, key, flags) +__bt_seqset(t, ep, key, flags) BTREE *t; EPG *ep; DBT *key; int flags; { - EPG *e; PAGE *h; pgno_t pg; int exact; /* - * Delete any already deleted record that we've been saving because - * the cursor pointed to it. Since going to a specific key, should - * delete any logically deleted records so they aren't found. - */ - if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) - return (RET_ERROR); - - /* - * Find the first, last or specific key in the tree and point the cursor - * at it. The cursor may not be moved until a new key has been found. + * Find the first, last or specific key in the tree and point the + * cursor at it. The cursor may not be moved until a new key has + * been found. */ - switch(flags) { + switch (flags) { case R_CURSOR: /* Keyed scan. */ /* - * Find the first instance of the key or the smallest key which - * is greater than or equal to the specified key. If run out - * of keys, return RET_SPECIAL. + * Find the first instance of the key or the smallest key + * which is greater than or equal to the specified key. */ if (key->data == NULL || key->size == 0) { errno = EINVAL; return (RET_ERROR); } - e = __bt_first(t, key, &exact); /* Returns pinned page. */ - if (e == NULL) - return (RET_ERROR); - /* - * If at the end of a page, skip any empty pages and find the - * next entry. - */ - if (e->index == NEXTINDEX(e->page)) { - h = e->page; - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); - e->index = 0; - e->page = h; - } - *ep = *e; - break; + return (__bt_first(t, key, ep, &exact)); case R_FIRST: /* First record. */ case R_NEXT: /* Walk down the left-hand side of the tree. */ for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, 0)->pgno; mpool_put(t->bt_mp, h, 0); } - - /* Skip any empty pages. */ - while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } - - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - ep->page = h; ep->index = 0; break; @@ -243,25 +202,19 @@ bt_seqset(t, ep, key, flags) for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; mpool_put(t->bt_mp, h, 0); } - /* Skip any empty pages. */ - while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { - pg = h->prevpg; - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } - - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - ep->page = h; ep->index = NEXTINDEX(h) - 1; break; @@ -270,7 +223,8 @@ bt_seqset(t, ep, key, flags) } /* - * BT_SEQADVANCE -- Advance the sequential scan. + * __bt_seqadvance -- + * Advance the sequential scan. * * Parameters: * t: tree @@ -283,98 +237,224 @@ bt_seqset(t, ep, key, flags) * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int -bt_seqadv(t, e, flags) +__bt_seqadv(t, ep, flags) BTREE *t; - EPG *e; + EPG *ep; int flags; { - EPGNO *c, delc; + CURSOR *c; PAGE *h; indx_t index; pgno_t pg; + int exact; - /* Save the current cursor if going to delete it. */ - c = &t->bt_bcursor; - if (ISSET(t, B_DELCRSR)) - delc = *c; + /* + * There are a couple of states that we can be in. The cursor has + * been initialized by the time we get here, but that's all we know. + */ + c = &t->bt_cursor; - if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) + /* + * The cursor was deleted where there weren't any duplicate records, + * so the key was saved. Find out where that key would go in the + * current tree. It doesn't matter if the returned key is an exact + * match or not -- if it's an exact match, the record was added after + * the delete so we can just return it. If not, as long as there's + * a record there, return it. + */ + if (F_ISSET(c, CURS_ACQUIRE)) + return (__bt_first(t, &c->key, ep, &exact)); + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) return (RET_ERROR); /* - * Find the next/previous record in the tree and point the cursor at it. - * The cursor may not be moved until a new key has been found. + * Find the next/previous record in the tree and point the cursor at + * it. The cursor may not be moved until a new key has been found. */ - index = c->index; - switch(flags) { + switch (flags) { case R_NEXT: /* Next record. */ + /* + * The cursor was deleted in duplicate records, and moved + * forward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_AFTER)) + goto usecurrent; + index = c->pg.index; if (++index == NEXTINDEX(h)) { - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); index = 0; } break; case R_PREV: /* Previous record. */ - if (index-- == 0) { - do { - pg = h->prevpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); - index = NEXTINDEX(h) - 1; + /* + * The cursor was deleted in duplicate records, and moved + * backward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_BEFORE)) { +usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); + ep->page = h; + ep->index = c->pg.index; + return (RET_SUCCESS); } + index = c->pg.index; + if (index == 0) { + pg = h->prevpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + index = NEXTINDEX(h) - 1; + } else + --index; break; } - e->page = h; - e->index = index; + ep->page = h; + ep->index = index; + return (RET_SUCCESS); +} + +/* + * __bt_first -- + * Find the first entry. + * + * Parameters: + * t: the tree + * key: the key + * erval: return EPG + * exactp: pointer to exact match flag + * + * Returns: + * The first entry in the tree greater than or equal to key, + * or RET_SPECIAL if no such key exists. + */ +static int +__bt_first(t, key, erval, exactp) + BTREE *t; + const DBT *key; + EPG *erval; + int *exactp; +{ + PAGE *h; + EPG *ep, save; + pgno_t pg; /* - * Delete any already deleted record that we've been saving because the - * cursor pointed to it. This could cause the new index to be shifted - * down by one if the record we're deleting is on the same page and has - * a larger index. + * Find any matching record; __bt_search pins the page. + * + * If it's an exact match and duplicates are possible, walk backwards + * in the tree until we find the first one. Otherwise, make sure it's + * a valid key (__bt_search may return an index just past the end of a + * page) and return it. */ - if (ISSET(t, B_DELCRSR)) { - CLR(t, B_DELCRSR); /* Don't try twice. */ - if (c->pgno == delc.pgno && c->index > delc.index) - --c->index; - if (__bt_crsrdel(t, &delc)) + if ((ep = __bt_search(t, key, exactp)) == NULL) + return (NULL); + if (*exactp) { + if (F_ISSET(t, B_NODUPS)) { + *erval = *ep; + return (RET_SUCCESS); + } + + /* + * Walk backwards, as long as the entry matches and there are + * keys left in the tree. Save a copy of each match in case + * we go too far. + */ + save = *ep; + h = ep->page; + do { + if (save.page->pgno != ep->page->pgno) { + mpool_put(t->bt_mp, save.page, 0); + save = *ep; + } else + save.index = ep->index; + + /* + * Don't unpin the page the last (or original) match + * was on, but make sure it's unpinned if an error + * occurs. + */ + if (ep->index == 0) { + if (h->prevpg == P_INVALID) + break; + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, + h->prevpg, 0)) == NULL) { + if (h->pgno == save.page->pgno) + mpool_put(t->bt_mp, + save.page, 0); + return (RET_ERROR); + } + ep->page = h; + ep->index = NEXTINDEX(h); + } + --ep->index; + } while (__bt_cmp(t, key, ep) == 0); + + /* + * Reach here with the last page that was looked at pinned, + * which may or may not be the same as the last (or original) + * match page. If it's not useful, release it. + */ + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + + *erval = save; + return (RET_SUCCESS); + } + + /* If at the end of a page, find the next entry. */ + if (ep->index == NEXTINDEX(ep->page)) { + h = ep->page; + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + ep->index = 0; + ep->page = h; } + *erval = *ep; return (RET_SUCCESS); } /* - * __BT_CRSRDEL -- Delete the record referenced by the cursor. + * __bt_setcur -- + * Set the cursor to an entry in the tree. * * Parameters: - * t: tree - * - * Returns: - * RET_ERROR, RET_SUCCESS + * t: the tree + * pgno: page number + * index: page index */ -int -__bt_crsrdel(t, c) +void +__bt_setcur(t, pgno, index) BTREE *t; - EPGNO *c; + pgno_t pgno; + u_int index; { - PAGE *h; - int status; + /* Lose any already deleted key. */ + if (t->bt_cursor.key.data != NULL) { + free(t->bt_cursor.key.data); + t->bt_cursor.key.size = 0; + t->bt_cursor.key.data = NULL; + } + F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); - CLR(t, B_DELCRSR); /* Don't try twice. */ - if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) - return (RET_ERROR); - status = __bt_dleaf(t, h, c->index); - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - return (status); + /* Update the cursor. */ + t->bt_cursor.pg.pgno = pgno; + t->bt_cursor.pg.index = index; + F_SET(&t->bt_cursor, CURS_INIT); } diff --git a/lib/libc/db/btree/bt_split.c b/lib/libc/db/btree/bt_split.c index 4a572c0..1646d82 100644 --- a/lib/libc/db/btree/bt_split.c +++ b/lib/libc/db/btree/bt_split.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_split.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -79,13 +79,13 @@ u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; * RET_ERROR, RET_SUCCESS */ int -__bt_split(t, sp, key, data, flags, ilen, skip) +__bt_split(t, sp, key, data, flags, ilen, argskip) BTREE *t; PAGE *sp; const DBT *key, *data; int flags; size_t ilen; - indx_t skip; + u_int32_t argskip; { BINTERNAL *bi; BLEAF *bl, *tbl; @@ -93,7 +93,8 @@ __bt_split(t, sp, key, data, flags, ilen, skip) EPGNO *parent; PAGE *h, *l, *r, *lchild, *rchild; indx_t nxtindex; - size_t n, nbytes, nksize; + u_int16_t skip; + u_int32_t n, nbytes, nksize; int parentsplit; char *dest; @@ -103,6 +104,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) * skip set to the offset which should be used. Additionally, l and r * are pinned. */ + skip = argskip; h = sp->pgno == P_ROOT ? bt_root(t, sp, &l, &r, &skip, ilen) : bt_page(t, sp, &l, &r, &skip, ilen); @@ -115,14 +117,14 @@ __bt_split(t, sp, key, data, flags, ilen, skip) */ h->linp[skip] = h->upper -= ilen; dest = (char *)h + h->upper; - if (ISSET(t, R_RECNO)) + if (F_ISSET(t, R_RECNO)) WR_RLEAF(dest, data, flags) else WR_BLEAF(dest, key, data, flags) /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && - (ISSET(t, R_RECNO) ? + (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err2; @@ -230,7 +232,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) } /* Insert the key into the parent page. */ - switch(rchild->flags & P_TYPE) { + switch (rchild->flags & P_TYPE) { case P_BINTERNAL: h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; @@ -295,7 +297,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && - (ISSET(t, R_RECNO) ? + (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err1; @@ -388,6 +390,9 @@ bt_page(t, h, lp, rp, skip, ilen) mpool_put(t->bt_mp, r, 0); return (NULL); } +#ifdef PURIFY + memset(l, 0xff, t->bt_psize); +#endif l->pgno = h->pgno; l->nextpg = r->pgno; l->prevpg = h->prevpg; @@ -403,7 +408,7 @@ bt_page(t, h, lp, rp, skip, ilen) return (NULL); } tp->prevpg = r->pgno; - mpool_put(t->bt_mp, tp, 0); + mpool_put(t->bt_mp, tp, MPOOL_DIRTY); } /* @@ -534,7 +539,7 @@ bt_broot(t, h, l, r) { BINTERNAL *bi; BLEAF *bl; - size_t nbytes; + u_int32_t nbytes; char *dest; /* @@ -550,7 +555,7 @@ bt_broot(t, h, l, r) dest = (char *)h + h->upper; WR_BINTERNAL(dest, 0, l->pgno, 0); - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BLEAF: bl = GETBLEAF(r, 0); nbytes = NBINTERNAL(bl->ksize); @@ -613,12 +618,12 @@ bt_psplit(t, h, l, r, pskip, ilen) { BINTERNAL *bi; BLEAF *bl; + CURSOR *c; RLEAF *rl; - EPGNO *c; PAGE *rval; void *src; indx_t full, half, nxt, off, skip, top, used; - size_t nbytes; + u_int32_t nbytes; int bigkeycnt, isbigkey; /* @@ -702,19 +707,16 @@ bt_psplit(t, h, l, r, pskip, ilen) * cursor is at or past the skipped slot, the cursor is incremented by * one. If the cursor is on the right page, it is decremented by the * number of records split to the left page. - * - * Don't bother checking for the B_SEQINIT flag, the page number will - * be P_INVALID. */ - c = &t->bt_bcursor; - if (c->pgno == h->pgno) { - if (c->index >= skip) - ++c->index; - if (c->index < nxt) /* Left page. */ - c->pgno = l->pgno; + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { + if (c->pg.index >= skip) + ++c->pg.index; + if (c->pg.index < nxt) /* Left page. */ + c->pg.pgno = l->pgno; else { /* Right page. */ - c->pgno = r->pgno; - c->index -= nxt; + c->pg.pgno = r->pgno; + c->pg.index -= nxt; } } diff --git a/lib/libc/db/btree/bt_utils.c b/lib/libc/db/btree/bt_utils.c index d2d1f73..9c1438e 100644 --- a/lib/libc/db/btree/bt_utils.c +++ b/lib/libc/db/btree/bt_utils.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -48,78 +48,91 @@ static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94"; #include "btree.h" /* - * __BT_RET -- Build return key/data pair as a result of search or scan. + * __bt_ret -- + * Build return key/data pair. * * Parameters: * t: tree - * d: LEAF to be returned to the user. + * e: key/data pair to be returned * key: user's key structure (NULL if not to be filled in) - * data: user's data structure + * rkey: memory area to hold key + * data: user's data structure (NULL if not to be filled in) + * rdata: memory area to hold data + * copy: always copy the key/data item * * Returns: * RET_SUCCESS, RET_ERROR. */ int -__bt_ret(t, e, key, data) +__bt_ret(t, e, key, rkey, data, rdata, copy) BTREE *t; EPG *e; - DBT *key, *data; + DBT *key, *rkey, *data, *rdata; + int copy; { - register BLEAF *bl; - register void *p; + BLEAF *bl; + void *p; bl = GETBLEAF(e->page, e->index); /* - * We always copy big keys/data to make them contigous. Otherwise, - * we leave the page pinned and don't copy unless the user specified + * We must copy big keys/data to make them contigous. Otherwise, + * leave the page pinned and don't copy unless the user specified * concurrent access. */ - if (bl->flags & P_BIGDATA) { - if (__ovfl_get(t, bl->bytes + bl->ksize, - &data->size, &t->bt_dbuf, &t->bt_dbufsz)) + if (key == NULL) + goto dataonly; + + if (bl->flags & P_BIGKEY) { + if (__ovfl_get(t, bl->bytes, + &key->size, &rkey->data, &rkey->size)) return (RET_ERROR); - data->data = t->bt_dbuf; - } else if (ISSET(t, B_DB_LOCK)) { - /* Use +1 in case the first record retrieved is 0 length. */ - if (bl->dsize + 1 > t->bt_dbufsz) { - if ((p = - (void *)realloc(t->bt_dbuf, bl->dsize + 1)) == NULL) + key->data = rkey->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + if (bl->ksize > rkey->size) { + p = (void *)(rkey->data == NULL ? + malloc(bl->ksize) : realloc(rkey->data, bl->ksize)); + if (p == NULL) return (RET_ERROR); - t->bt_dbuf = p; - t->bt_dbufsz = bl->dsize + 1; + rkey->data = p; + rkey->size = bl->ksize; } - memmove(t->bt_dbuf, bl->bytes + bl->ksize, bl->dsize); - data->size = bl->dsize; - data->data = t->bt_dbuf; + memmove(rkey->data, bl->bytes, bl->ksize); + key->size = bl->ksize; + key->data = rkey->data; } else { - data->size = bl->dsize; - data->data = bl->bytes + bl->ksize; + key->size = bl->ksize; + key->data = bl->bytes; } - if (key == NULL) +dataonly: + if (data == NULL) return (RET_SUCCESS); - if (bl->flags & P_BIGKEY) { - if (__ovfl_get(t, bl->bytes, - &key->size, &t->bt_kbuf, &t->bt_kbufsz)) + if (bl->flags & P_BIGDATA) { + if (__ovfl_get(t, bl->bytes + bl->ksize, + &data->size, &rdata->data, &rdata->size)) return (RET_ERROR); - key->data = t->bt_kbuf; - } else if (ISSET(t, B_DB_LOCK)) { - if (bl->ksize > t->bt_kbufsz) { - if ((p = - (void *)realloc(t->bt_kbuf, bl->ksize)) == NULL) + data->data = rdata->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (bl->dsize + 1 > rdata->size) { + p = (void *)(rdata->data == NULL ? + malloc(bl->dsize + 1) : + realloc(rdata->data, bl->dsize + 1)); + if (p == NULL) return (RET_ERROR); - t->bt_kbuf = p; - t->bt_kbufsz = bl->ksize; + rdata->data = p; + rdata->size = bl->dsize + 1; } - memmove(t->bt_kbuf, bl->bytes, bl->ksize); - key->size = bl->ksize; - key->data = t->bt_kbuf; + memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize); + data->size = bl->dsize; + data->data = rdata->data; } else { - key->size = bl->ksize; - key->data = bl->bytes; + data->size = bl->dsize; + data->data = bl->bytes + bl->ksize; } + return (RET_SUCCESS); } @@ -180,9 +193,9 @@ __bt_cmp(t, k1, e) if (bigkey) { if (__ovfl_get(t, bigkey, - &k2.size, &t->bt_dbuf, &t->bt_dbufsz)) + &k2.size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); - k2.data = t->bt_dbuf; + k2.data = t->bt_rdata.data; } return ((*t->bt_cmp)(k1, &k2)); } diff --git a/lib/libc/db/btree/btree.h b/lib/libc/db/btree/btree.h index dd798ec..36d35c9 100644 --- a/lib/libc/db/btree/btree.h +++ b/lib/libc/db/btree/btree.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -33,9 +33,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)btree.h 8.5 (Berkeley) 2/21/94 + * @(#)btree.h 8.11 (Berkeley) 8/17/94 */ +/* Macros to set/clear/test flags. */ +#define F_SET(p, f) (p)->flags |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) + #include #define DEFMINKEYPAGE (2) /* Minimum keys per page */ @@ -79,8 +84,9 @@ typedef struct _page { } PAGE; /* First and next index. */ -#define BTDATAOFF (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ - sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) +#define BTDATAOFF \ + (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ + sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) #define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) /* @@ -99,9 +105,8 @@ typedef struct _page { * be manipulated without copying. (This presumes that 32 bit items can be * manipulated on this system.) */ -#define LALIGN(n) \ - (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) -#define NOVFLSIZE (sizeof(pgno_t) + sizeof(size_t)) +#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) +#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t)) /* * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} @@ -113,7 +118,7 @@ typedef struct _page { * some minor modifications of the above rule. */ typedef struct _binternal { - size_t ksize; /* key size */ + u_int32_t ksize; /* key size */ pgno_t pgno; /* page number stored on */ #define P_BIGDATA 0x01 /* overflow data */ #define P_BIGKEY 0x02 /* overflow key */ @@ -122,21 +127,21 @@ typedef struct _binternal { } BINTERNAL; /* Get the page's BINTERNAL structure at index indx. */ -#define GETBINTERNAL(pg, indx) \ +#define GETBINTERNAL(pg, indx) \ ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ -#define NBINTERNAL(len) \ - LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) +#define NBINTERNAL(len) \ + LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) /* Copy a BINTERNAL entry to the page. */ -#define WR_BINTERNAL(p, size, pgno, flags) { \ - *(size_t *)p = size; \ - p += sizeof(size_t); \ - *(pgno_t *)p = pgno; \ - p += sizeof(pgno_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ +#define WR_BINTERNAL(p, size, pgno, flags) { \ + *(u_int32_t *)p = size; \ + p += sizeof(u_int32_t); \ + *(pgno_t *)p = pgno; \ + p += sizeof(pgno_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ } /* @@ -149,78 +154,78 @@ typedef struct _rinternal { } RINTERNAL; /* Get the page's RINTERNAL structure at index indx. */ -#define GETRINTERNAL(pg, indx) \ +#define GETRINTERNAL(pg, indx) \ ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ -#define NRINTERNAL \ +#define NRINTERNAL \ LALIGN(sizeof(recno_t) + sizeof(pgno_t)) /* Copy a RINTERAL entry to the page. */ -#define WR_RINTERNAL(p, nrecs, pgno) { \ - *(recno_t *)p = nrecs; \ - p += sizeof(recno_t); \ - *(pgno_t *)p = pgno; \ +#define WR_RINTERNAL(p, nrecs, pgno) { \ + *(recno_t *)p = nrecs; \ + p += sizeof(recno_t); \ + *(pgno_t *)p = pgno; \ } /* For the btree leaf pages, the item is a key and data pair. */ typedef struct _bleaf { - size_t ksize; /* size of key */ - size_t dsize; /* size of data */ + u_int32_t ksize; /* size of key */ + u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA, P_BIGKEY */ char bytes[1]; /* data */ } BLEAF; /* Get the page's BLEAF structure at index indx. */ -#define GETBLEAF(pg, indx) \ +#define GETBLEAF(pg, indx) \ ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) /* Get the number of bytes in the user's key/data pair. */ -#define NBLEAFDBT(ksize, dsize) \ - LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \ +#define NBLEAFDBT(ksize, dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \ (ksize) + (dsize)) /* Copy a BLEAF entry to the page. */ -#define WR_BLEAF(p, key, data, flags) { \ - *(size_t *)p = key->size; \ - p += sizeof(size_t); \ - *(size_t *)p = data->size; \ - p += sizeof(size_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, key->data, key->size); \ - p += key->size; \ - memmove(p, data->data, data->size); \ +#define WR_BLEAF(p, key, data, flags) { \ + *(u_int32_t *)p = key->size; \ + p += sizeof(u_int32_t); \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, key->data, key->size); \ + p += key->size; \ + memmove(p, data->data, data->size); \ } /* For the recno leaf pages, the item is a data entry. */ typedef struct _rleaf { - size_t dsize; /* size of data */ + u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA */ char bytes[1]; } RLEAF; /* Get the page's RLEAF structure at index indx. */ -#define GETRLEAF(pg, indx) \ +#define GETRLEAF(pg, indx) \ ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NRLEAF(p) NRLEAFDBT((p)->dsize) /* Get the number of bytes from the user's data. */ -#define NRLEAFDBT(dsize) \ - LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize)) +#define NRLEAFDBT(dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize)) /* Copy a RLEAF entry to the page. */ -#define WR_RLEAF(p, data, flags) { \ - *(size_t *)p = data->size; \ - p += sizeof(size_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, data->data, data->size); \ +#define WR_RLEAF(p, data, flags) { \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, data->data, data->size); \ } /* @@ -232,12 +237,6 @@ typedef struct _rleaf { * record less than key in the tree so that descents work. Leaf page searches * must find the smallest record greater than key so that the returned index * is the record's correct position for insertion. - * - * One comment about cursors. The cursor key is never removed from the tree, - * even if deleted. This is because it is quite difficult to decide where the - * cursor should be when other keys have been inserted/deleted in the tree; - * duplicate keys make it impossible. This scheme does require extra work - * though, to make sure that we don't perform an operation on a deleted key. */ typedef struct _epgno { pgno_t pgno; /* the page number */ @@ -250,53 +249,90 @@ typedef struct _epg { } EPG; /* - * The metadata of the tree. The m_nrecs field is used only by the RECNO code. + * About cursors. The cursor (and the page that contained the key/data pair + * that it referenced) can be deleted, which makes things a bit tricky. If + * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set + * or there simply aren't any duplicates of the key) we copy the key that it + * referenced when it's deleted, and reacquire a new cursor key if the cursor + * is used again. If there are duplicates keys, we move to the next/previous + * key, and set a flag so that we know what happened. NOTE: if duplicate (to + * the cursor) keys are added to the tree during this process, it is undefined + * if they will be returned or not in a cursor scan. + * + * The flags determine the possible states of the cursor: + * + * CURS_INIT The cursor references *something*. + * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that + * we can reacquire the right position in the tree. + * CURS_AFTER, CURS_BEFORE + * The cursor was deleted, and now references a key/data pair + * that has not yet been returned, either before or after the + * deleted key/data pair. + * XXX + * This structure is broken out so that we can eventually offer multiple + * cursors as part of the DB interface. + */ +typedef struct _cursor { + EPGNO pg; /* B: Saved tree reference. */ + DBT key; /* B: Saved key, or key.data == NULL. */ + recno_t rcursor; /* R: recno cursor (1-based) */ + +#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ +#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ +#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ +#define CURS_INIT 0x08 /* RB: Cursor initialized. */ + u_int8_t flags; +} CURSOR; + +/* + * The metadata of the tree. The nrecs field is used only by the RECNO code. * This is because the btree doesn't really need it and it requires that every * put or delete call modify the metadata. */ typedef struct _btmeta { - u_int32_t m_magic; /* magic number */ - u_int32_t m_version; /* version */ - u_int32_t m_psize; /* page size */ - u_int32_t m_free; /* page number of first free page */ - u_int32_t m_nrecs; /* R: number of records */ + u_int32_t magic; /* magic number */ + u_int32_t version; /* version */ + u_int32_t psize; /* page size */ + u_int32_t free; /* page number of first free page */ + u_int32_t nrecs; /* R: number of records */ + #define SAVEMETA (B_NODUPS | R_RECNO) - u_int32_t m_flags; /* bt_flags & SAVEMETA */ - u_int32_t m_unused; /* unused */ + u_int32_t flags; /* bt_flags & SAVEMETA */ } BTMETA; /* The in-memory btree/recno data structure. */ typedef struct _btree { - MPOOL *bt_mp; /* memory pool cookie */ + MPOOL *bt_mp; /* memory pool cookie */ - DB *bt_dbp; /* pointer to enclosing DB */ + DB *bt_dbp; /* pointer to enclosing DB */ - EPG bt_cur; /* current (pinned) page */ - PAGE *bt_pinned; /* page pinned across calls */ + EPG bt_cur; /* current (pinned) page */ + PAGE *bt_pinned; /* page pinned across calls */ - EPGNO bt_bcursor; /* B: btree cursor */ - recno_t bt_rcursor; /* R: recno cursor (1-based) */ + CURSOR bt_cursor; /* cursor */ -#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL) -#define BT_CLR(t) (t->bt_sp = 0) - EPGNO *bt_stack; /* stack of parent pages */ - u_int bt_sp; /* current stack pointer */ - u_int bt_maxstack; /* largest stack */ +#define BT_PUSH(t, p, i) { \ + t->bt_sp->pgno = p; \ + t->bt_sp->index = i; \ + ++t->bt_sp; \ +} +#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp) +#define BT_CLR(t) (t->bt_sp = t->bt_stack) + EPGNO bt_stack[50]; /* stack of parent pages */ + EPGNO *bt_sp; /* current stack pointer */ - char *bt_kbuf; /* key buffer */ - size_t bt_kbufsz; /* key buffer size */ - char *bt_dbuf; /* data buffer */ - size_t bt_dbufsz; /* data buffer size */ + DBT bt_rkey; /* returned key */ + DBT bt_rdata; /* returned data */ - int bt_fd; /* tree file descriptor */ + int bt_fd; /* tree file descriptor */ - pgno_t bt_free; /* next free page */ + pgno_t bt_free; /* next free page */ u_int32_t bt_psize; /* page size */ - indx_t bt_ovflsize; /* cut-off for key/data overflow */ - int bt_lorder; /* byte order */ + indx_t bt_ovflsize; /* cut-off for key/data overflow */ + int bt_lorder; /* byte order */ /* sorted order */ enum { NOT, BACK, FORWARD } bt_order; - EPGNO bt_last; /* last insert */ + EPGNO bt_last; /* last insert */ /* B: key comparison function */ int (*bt_cmp) __P((const DBT *, const DBT *)); @@ -305,49 +341,43 @@ typedef struct _btree { /* R: recno input function */ int (*bt_irec) __P((struct _btree *, recno_t)); - FILE *bt_rfp; /* R: record FILE pointer */ - int bt_rfd; /* R: record file descriptor */ + FILE *bt_rfp; /* R: record FILE pointer */ + int bt_rfd; /* R: record file descriptor */ - caddr_t bt_cmap; /* R: current point in mapped space */ - caddr_t bt_smap; /* R: start of mapped space */ - caddr_t bt_emap; /* R: end of mapped space */ - size_t bt_msize; /* R: size of mapped region. */ + caddr_t bt_cmap; /* R: current point in mapped space */ + caddr_t bt_smap; /* R: start of mapped space */ + caddr_t bt_emap; /* R: end of mapped space */ + size_t bt_msize; /* R: size of mapped region. */ - recno_t bt_nrecs; /* R: number of records */ - size_t bt_reclen; /* R: fixed record length */ - u_char bt_bval; /* R: delimiting byte/pad character */ + recno_t bt_nrecs; /* R: number of records */ + size_t bt_reclen; /* R: fixed record length */ + u_char bt_bval; /* R: delimiting byte/pad character */ /* * NB: * B_NODUPS and R_RECNO are stored on disk, and may not be changed. */ -#define B_DELCRSR 0x00001 /* cursor has been deleted */ -#define B_INMEM 0x00002 /* in-memory tree */ -#define B_METADIRTY 0x00004 /* need to write metadata */ -#define B_MODIFIED 0x00008 /* tree modified */ -#define B_NEEDSWAP 0x00010 /* if byte order requires swapping */ +#define B_INMEM 0x00001 /* in-memory tree */ +#define B_METADIRTY 0x00002 /* need to write metadata */ +#define B_MODIFIED 0x00004 /* tree modified */ +#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ +#define B_RDONLY 0x00010 /* read-only tree */ + #define B_NODUPS 0x00020 /* no duplicate keys permitted */ -#define B_RDONLY 0x00040 /* read-only tree */ #define R_RECNO 0x00080 /* record oriented tree */ -#define B_SEQINIT 0x00100 /* sequential scan initialized */ - -#define R_CLOSEFP 0x00200 /* opened a file pointer */ -#define R_EOF 0x00400 /* end of input file reached. */ -#define R_FIXLEN 0x00800 /* fixed length records */ -#define R_MEMMAPPED 0x01000 /* memory mapped file. */ -#define R_INMEM 0x02000 /* in-memory file */ -#define R_MODIFIED 0x04000 /* modified file */ -#define R_RDONLY 0x08000 /* read-only file */ -#define B_DB_LOCK 0x10000 /* DB_LOCK specified. */ -#define B_DB_SHMEM 0x20000 /* DB_SHMEM specified. */ -#define B_DB_TXN 0x40000 /* DB_TXN specified. */ - - u_int32_t bt_flags; /* btree state */ +#define R_CLOSEFP 0x00040 /* opened a file pointer */ +#define R_EOF 0x00100 /* end of input file reached. */ +#define R_FIXLEN 0x00200 /* fixed length records */ +#define R_MEMMAPPED 0x00400 /* memory mapped file. */ +#define R_INMEM 0x00800 /* in-memory file */ +#define R_MODIFIED 0x01000 /* modified file */ +#define R_RDONLY 0x02000 /* read-only file */ + +#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ +#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ +#define B_DB_TXN 0x10000 /* DB_TXN specified. */ + u_int32_t flags; } BTREE; -#define SET(t, f) ((t)->bt_flags |= (f)) -#define CLR(t, f) ((t)->bt_flags &= ~(f)) -#define ISSET(t, f) ((t)->bt_flags & (f)) - #include "extern.h" diff --git a/lib/libc/db/btree/extern.h b/lib/libc/db/btree/extern.h index 4007bc7..ebd9c54 100644 --- a/lib/libc/db/btree/extern.h +++ b/lib/libc/db/btree/extern.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)extern.h 8.3 (Berkeley) 2/21/94 + * @(#)extern.h 8.10 (Berkeley) 7/20/94 */ int __bt_close __P((DB *)); @@ -39,9 +39,8 @@ int __bt_crsrdel __P((BTREE *, EPGNO *)); int __bt_defcmp __P((const DBT *, const DBT *)); size_t __bt_defpfx __P((const DBT *, const DBT *)); int __bt_delete __P((const DB *, const DBT *, u_int)); -int __bt_dleaf __P((BTREE *, PAGE *, int)); +int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int)); int __bt_fd __P((const DB *)); -EPG *__bt_first __P((BTREE *, const DBT *, int *)); int __bt_free __P((BTREE *, PAGE *)); int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); PAGE *__bt_new __P((BTREE *, pgno_t *)); @@ -49,15 +48,16 @@ void __bt_pgin __P((void *, pgno_t, void *)); void __bt_pgout __P((void *, pgno_t, void *)); int __bt_push __P((BTREE *, pgno_t, int)); int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); -int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *)); +int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int)); EPG *__bt_search __P((BTREE *, const DBT *, int *)); int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); +void __bt_setcur __P((BTREE *, pgno_t, u_int)); int __bt_split __P((BTREE *, PAGE *, - const DBT *, const DBT *, int, size_t, indx_t)); + const DBT *, const DBT *, int, size_t, u_int32_t)); int __bt_sync __P((const DB *, u_int)); int __ovfl_delete __P((BTREE *, void *)); -int __ovfl_get __P((BTREE *, void *, size_t *, char **, size_t *)); +int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *)); int __ovfl_put __P((BTREE *, const DBT *, pgno_t *)); #ifdef DEBUG diff --git a/lib/libc/db/docs/hash.usenix.ps b/lib/libc/db/docs/hash.usenix.ps new file mode 100644 index 0000000..c884778 --- /dev/null +++ b/lib/libc/db/docs/hash.usenix.ps @@ -0,0 +1,12209 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Tue Dec 11 15:06:45 1990 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header: psdit.pro,v 2.1 85/11/24 12:19:43 shore Rel $ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +22 s +1249 626(A)N +1420(N)X +1547(ew)X +1796(H)X +1933(ashing)X +2467(P)X +2574(ackage)X +3136(for)X +3405(U)X +3532(N)X +3659(IX)X +2 f +20 s +3855 562(1)N +1 f +12 s +1607 779(Margo)N +1887(Seltzer)X +9 f +2179(-)X +1 f +2256(University)X +2686(of)X +2790(California,)X +3229(Berkeley)X +2015 875(Ozan)N +2242(Yigit)X +9 f +2464(-)X +1 f +2541(York)X +2762(University)X +3 f +2331 1086(ABSTRACT)N +1 f +10 s +1152 1222(UNIX)N +1385(support)X +1657(of)X +1756(disk)X +1921(oriented)X +2216(hashing)X +2497(was)X +2654(originally)X +2997(provided)X +3314(by)X +2 f +3426(dbm)X +1 f +3595([ATT79])X +3916(and)X +1152 1310(subsequently)N +1595(improved)X +1927(upon)X +2112(in)X +2 f +2199(ndbm)X +1 f +2402([BSD86].)X +2735(In)X +2826(AT&T)X +3068(System)X +3327(V,)X +3429(in-memory)X +3809(hashed)X +1152 1398(storage)N +1420(and)X +1572(access)X +1814(support)X +2090(was)X +2251(added)X +2479(in)X +2577(the)X +2 f +2711(hsearch)X +1 f +3000(library)X +3249(routines)X +3542([ATT85].)X +3907(The)X +1152 1486(result)N +1367(is)X +1457(a)X +1530(system)X +1789(with)X +1968(two)X +2125(incompatible)X +2580(hashing)X +2865(schemes,)X +3193(each)X +3377(with)X +3555(its)X +3666(own)X +3840(set)X +3965(of)X +1152 1574(shortcomings.)N +1152 1688(This)N +1316(paper)X +1517(presents)X +1802(the)X +1922(design)X +2152(and)X +2289(performance)X +2717(characteristics)X +3198(of)X +3286(a)X +3343(new)X +3498(hashing)X +3768(package)X +1152 1776(providing)N +1483(a)X +1539(superset)X +1822(of)X +1909(the)X +2027(functionality)X +2456(provided)X +2761(by)X +2 f +2861(dbm)X +1 f +3019(and)X +2 f +3155(hsearch)X +1 f +3409(.)X +3469(The)X +3614(new)X +3768(package)X +1152 1864(uses)N +1322(linear)X +1537(hashing)X +1818(to)X +1912(provide)X +2189(ef\256cient)X +2484(support)X +2755(of)X +2853(both)X +3026(memory)X +3324(based)X +3538(and)X +3685(disk)X +3849(based)X +1152 1952(hash)N +1319(tables)X +1526(with)X +1688(performance)X +2115(superior)X +2398(to)X +2480(both)X +2 f +2642(dbm)X +1 f +2800(and)X +2 f +2936(hsearch)X +1 f +3210(under)X +3413(most)X +3588(conditions.)X +3 f +1380 2128(Introduction)N +1 f +892 2260(Current)N +1196(UNIX)X +1456(systems)X +1768(offer)X +1984(two)X +2163(forms)X +2409(of)X +720 2348(hashed)N +973(data)X +1137(access.)X +2 f +1413(Dbm)X +1 f +1599(and)X +1745(its)X +1850(derivatives)X +2231(provide)X +720 2436(keyed)N +939(access)X +1171(to)X +1259(disk)X +1418(resident)X +1698(data)X +1858(while)X +2 f +2062(hsearch)X +1 f +2342(pro-)X +720 2524(vides)N +929(access)X +1175(for)X +1309(memory)X +1616(resident)X +1910(data.)X +2124(These)X +2356(two)X +720 2612(access)N +979(methods)X +1302(are)X +1453(incompatible)X +1923(in)X +2037(that)X +2209(memory)X +720 2700(resident)N +1011(hash)X +1195(tables)X +1419(may)X +1593(not)X +1731(be)X +1843(stored)X +2075(on)X +2191(disk)X +2360(and)X +720 2788(disk)N +884(resident)X +1169(tables)X +1387(cannot)X +1632(be)X +1739(read)X +1909(into)X +2063(memory)X +2360(and)X +720 2876(accessed)N +1022(using)X +1215(the)X +1333(in-memory)X +1709(routines.)X +2 f +892 2990(Dbm)N +1 f +1091(has)X +1241(several)X +1512(shortcomings.)X +2026(Since)X +2247(data)X +2423(is)X +720 3078(assumed)N +1032(to)X +1130(be)X +1242(disk)X +1411(resident,)X +1721(each)X +1905(access)X +2146(requires)X +2440(a)X +720 3166(system)N +963(call,)X +1120(and)X +1257(almost)X +1491(certainly,)X +1813(a)X +1869(disk)X +2022(operation.)X +2365(For)X +720 3254(extremely)N +1072(large)X +1264(databases,)X +1623(where)X +1851(caching)X +2131(is)X +2214(unlikely)X +720 3342(to)N +810(be)X +914(effective,)X +1244(this)X +1386(is)X +1466(acceptable,)X +1853(however,)X +2177(when)X +2378(the)X +720 3430(database)N +1022(is)X +1100(small)X +1298(\(i.e.)X +1447(the)X +1569(password)X +1896(\256le\),)X +2069(performance)X +720 3518(improvements)N +1204(can)X +1342(be)X +1443(obtained)X +1744(through)X +2018(caching)X +2293(pages)X +720 3606(of)N +818(the)X +947(database)X +1255(in)X +1348(memory.)X +1685(In)X +1782(addition,)X +2 f +2094(dbm)X +1 f +2262(cannot)X +720 3694(store)N +902(data)X +1062(items)X +1261(whose)X +1492(total)X +1660(key)X +1802(and)X +1943(data)X +2102(size)X +2252(exceed)X +720 3782(the)N +850(page)X +1034(size)X +1191(of)X +1290(the)X +1420(hash)X +1599(table.)X +1827(Similarly,)X +2176(if)X +2257(two)X +2409(or)X +720 3870(more)N +907(keys)X +1076(produce)X +1357(the)X +1477(same)X +1664(hash)X +1833(value)X +2029(and)X +2166(their)X +2334(total)X +720 3958(size)N +876(exceeds)X +1162(the)X +1291(page)X +1474(size,)X +1650(the)X +1779(table)X +1966(cannot)X +2210(store)X +2396(all)X +720 4046(the)N +838(colliding)X +1142(keys.)X +892 4160(The)N +1050(in-memory)X +2 f +1439(hsearch)X +1 f +1725(routines)X +2015(have)X +2199(different)X +720 4248(shortcomings.)N +1219(First,)X +1413(the)X +1539(notion)X +1771(of)X +1865(a)X +1928(single)X +2146(hash)X +2320(table)X +720 4336(is)N +807(embedded)X +1171(in)X +1266(the)X +1397(interface,)X +1732(preventing)X +2108(an)X +2217(applica-)X +720 4424(tion)N +902(from)X +1116(accessing)X +1482(multiple)X +1806(tables)X +2050(concurrently.)X +720 4512(Secondly,)N +1063(the)X +1186(routine)X +1438(to)X +1525(create)X +1743(a)X +1804(hash)X +1976(table)X +2157(requires)X +2440(a)X +720 4600(parameter)N +1066(which)X +1286(declares)X +1573(the)X +1694(size)X +1842(of)X +1932(the)X +2053(hash)X +2223(table.)X +2422(If)X +720 4688(this)N +856(size)X +1001(is)X +1074(set)X +1183(too)X +1305(low,)X +1465(performance)X +1892(degradation)X +2291(or)X +2378(the)X +720 4776(inability)N +1008(to)X +1092(add)X +1230(items)X +1425(to)X +1509(the)X +1628(table)X +1805(may)X +1964(result.)X +2223(In)X +2311(addi-)X +720 4864(tion,)N +2 f +910(hsearch)X +1 f +1210(requires)X +1515(that)X +1681(the)X +1825(application)X +2226(allocate)X +720 4952(memory)N +1037(for)X +1181(the)X +1329(key)X +1495(and)X +1661(data)X +1845(items.)X +2108(Lastly,)X +2378(the)X +2 f +720 5040(hsearch)N +1 f +1013(routines)X +1310(provide)X +1594(no)X +1713(interface)X +2034(to)X +2135(store)X +2329(hash)X +720 5128(tables)N +927(on)X +1027(disk.)X +16 s +720 5593 MXY +864 0 Dl +2 f +8 s +760 5648(1)N +1 f +9 s +5673(UNIX)Y +990(is)X +1056(a)X +1106(registered)X +1408(trademark)X +1718(of)X +1796(AT&T.)X +10 s +2878 2128(The)N +3032(goal)X +3199(of)X +3295(our)X +3431(work)X +3625(was)X +3779(to)X +3870(design)X +4108(and)X +4253(imple-)X +2706 2216(ment)N +2900(a)X +2970(new)X +3138(package)X +3436(that)X +3590(provides)X +3899(a)X +3968(superset)X +4264(of)X +4364(the)X +2706 2304(functionality)N +3144(of)X +3240(both)X +2 f +3411(dbm)X +1 f +3578(and)X +2 f +3723(hsearch)X +1 f +3977(.)X +4045(The)X +4198(package)X +2706 2392(had)N +2871(to)X +2982(overcome)X +3348(the)X +3495(interface)X +3826(shortcomings)X +4306(cited)X +2706 2480(above)N +2930(and)X +3078(its)X +3185(implementation)X +3719(had)X +3867(to)X +3961(provide)X +4238(perfor-)X +2706 2568(mance)N +2942(equal)X +3142(or)X +3235(superior)X +3524(to)X +3612(that)X +3758(of)X +3851(the)X +3975(existing)X +4253(imple-)X +2706 2656(mentations.)N +3152(In)X +3274(order)X +3498(to)X +3614(provide)X +3913(a)X +4003(compact)X +4329(disk)X +2706 2744(representation,)N +3224(graceful)X +3531(table)X +3729(growth,)X +4018(and)X +4176(expected)X +2706 2832(constant)N +3033(time)X +3234(performance,)X +3720(we)X +3873(selected)X +4191(Litwin's)X +2706 2920(linear)N +2923(hashing)X +3206(algorithm)X +3551([LAR88,)X +3872(LIT80].)X +4178(We)X +4324(then)X +2706 3008(enhanced)N +3037(the)X +3161(algorithm)X +3498(to)X +3586(handle)X +3826(page)X +4004(over\257ows)X +4346(and)X +2706 3096(large)N +2900(key)X +3049(handling)X +3362(with)X +3537(a)X +3606(single)X +3830(mechanism,)X +4248(named)X +2706 3184(buddy-in-waiting.)N +3 f +2975 3338(Existing)N +3274(UNIX)X +3499(Hashing)X +3802(Techniques)X +1 f +2878 3470(Over)N +3076(the)X +3210(last)X +3357(decade,)X +3637(several)X +3901(dynamic)X +4213(hashing)X +2706 3558(schemes)N +3000(have)X +3174(been)X +3348(developed)X +3700(for)X +3816(the)X +3936(UNIX)X +4159(timeshar-)X +2706 3646(ing)N +2856(system,)X +3146(starting)X +3433(with)X +3622(the)X +3767(inclusion)X +4107(of)X +2 f +4221(dbm)X +1 f +4359(,)X +4426(a)X +2706 3734(minimal)N +3008(database)X +3321(library)X +3571(written)X +3834(by)X +3950(Ken)X +4120(Thompson)X +2706 3822([THOM90],)N +3141(in)X +3248(the)X +3391(Seventh)X +3694(Edition)X +3974(UNIX)X +4220(system.)X +2706 3910(Since)N +2916(then,)X +3106(an)X +3214(extended)X +3536(version)X +3804(of)X +3903(the)X +4032(same)X +4228(library,)X +2 f +2706 3998(ndbm)N +1 f +2884(,)X +2933(and)X +3078(a)X +3142(public-domain)X +3637(clone)X +3839(of)X +3934(the)X +4060(latter,)X +2 f +4273(sdbm)X +1 f +4442(,)X +2706 4086(have)N +2902(been)X +3098(developed.)X +3491(Another)X +3797 0.1645(interface-compatible)AX +2706 4174(library)N +2 f +2950(gdbm)X +1 f +3128(,)X +3178(was)X +3333(recently)X +3622(made)X +3826(available)X +4145(as)X +4241(part)X +4395(of)X +2706 4262(the)N +2829(Free)X +2997(Software)X +3312(Foundation's)X +3759(\(FSF\))X +3970(software)X +4271(distri-)X +2706 4350(bution.)N +2878 4464(All)N +3017(of)X +3121(these)X +3323(implementations)X +3893(are)X +4029(based)X +4248(on)X +4364(the)X +2706 4552(idea)N +2871(of)X +2969(revealing)X +3299(just)X +3445(enough)X +3711(bits)X +3856(of)X +3953(a)X +4019(hash)X +4196(value)X +4400(to)X +2706 4640(locate)N +2920(a)X +2978(page)X +3151(in)X +3234(a)X +3291(single)X +3503(access.)X +3770(While)X +2 f +3987(dbm/ndbm)X +1 f +4346(and)X +2 f +2706 4728(sdbm)N +1 f +2908(map)X +3079(the)X +3210(hash)X +3390(value)X +3597(directly)X +3874(to)X +3968(a)X +4036(disk)X +4201(address,)X +2 f +2706 4816(gdbm)N +1 f +2921(uses)X +3096(the)X +3231(hash)X +3414(value)X +3624(to)X +3722(index)X +3936(into)X +4096(a)X +2 f +4168(directory)X +1 f +2706 4904([ENB88])N +3020(containing)X +3378(disk)X +3531(addresses.)X +2878 5018(The)N +2 f +3033(hsearch)X +1 f +3317(routines)X +3605(in)X +3697(System)X +3962(V)X +4049(are)X +4177(designed)X +2706 5106(to)N +2804(provide)X +3085(memory-resident)X +3669(hash)X +3852(tables.)X +4115(Since)X +4328(data)X +2706 5194(access)N +2948(does)X +3131(not)X +3269(require)X +3533(disk)X +3702(access,)X +3964(simple)X +4213(hashing)X +2706 5282(schemes)N +3010(which)X +3238(may)X +3408(require)X +3667(multiple)X +3964(probes)X +4209(into)X +4364(the)X +2706 5370(table)N +2889(are)X +3015(used.)X +3209(A)X +3294(more)X +3486(interesting)X +3851(version)X +4114(of)X +2 f +4208(hsearch)X +1 f +2706 5458(is)N +2784(a)X +2845(public)X +3070(domain)X +3335(library,)X +2 f +3594(dynahash)X +1 f +3901(,)X +3945(that)X +4089(implements)X +2706 5546(Larson's)N +3036(in-memory)X +3440(adaptation)X +3822([LAR88])X +4164(of)X +4279(linear)X +2706 5634(hashing)N +2975([LIT80].)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +1 f +4424(1)X + +2 p +%%Page: 2 2 +10 s 10 xH 0 xS 1 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +2 f +1074 538(dbm)N +1 f +1232(and)X +2 f +1368(ndbm)X +1 f +604 670(The)N +2 f +760(dbm)X +1 f +928(and)X +2 f +1074(ndbm)X +1 f +1282(library)X +1526(implementations)X +2089(are)X +432 758(based)N +667(on)X +799(the)X +949(same)X +1166(algorithm)X +1529(by)X +1661(Ken)X +1846(Thompson)X +432 846([THOM90,)N +824(TOR88,)X +1113(WAL84],)X +1452(but)X +1582(differ)X +1789(in)X +1879(their)X +2054(pro-)X +432 934(grammatic)N +801(interfaces.)X +1160(The)X +1311(latter)X +1502(is)X +1581(a)X +1643(modi\256ed)X +1952(version)X +432 1022(of)N +533(the)X +665(former)X +918(which)X +1148(adds)X +1328(support)X +1601(for)X +1728(multiple)X +2027(data-)X +432 1110(bases)N +634(to)X +724(be)X +828(open)X +1011(concurrently.)X +1484(The)X +1636(discussion)X +1996(of)X +2090(the)X +432 1198(algorithm)N +774(that)X +925(follows)X +1196(is)X +1280(applicable)X +1640(to)X +1732(both)X +2 f +1904(dbm)X +1 f +2072(and)X +2 f +432 1286(ndbm)N +1 f +610(.)X +604 1400(The)N +760(basic)X +956(structure)X +1268(of)X +2 f +1366(dbm)X +1 f +1535(calls)X +1712(for)X +1836(\256xed-sized)X +432 1488(disk)N +612(blocks)X +868(\(buckets\))X +1214(and)X +1377(an)X +2 f +1499(access)X +1 f +1755(function)X +2068(that)X +432 1576(maps)N +623(a)X +681(key)X +819(to)X +902(a)X +959(bucket.)X +1234(The)X +1380(interface)X +1683(routines)X +1962(use)X +2090(the)X +2 f +432 1664(access)N +1 f +673(function)X +970(to)X +1062(obtain)X +1292(the)X +1420(appropriate)X +1816(bucket)X +2060(in)X +2152(a)X +432 1752(single)N +643(disk)X +796(access.)X +604 1866(Within)N +869(the)X +2 f +1010(access)X +1 f +1263(function,)X +1593(a)X +1672(bit-randomizing)X +432 1954(hash)N +610(function)X +2 f +8 s +877 1929(2)N +1 f +10 s +940 1954(is)N +1024(used)X +1202(to)X +1294(convert)X +1565(a)X +1631(key)X +1777(into)X +1931(a)X +1997(32-bit)X +432 2042(hash)N +605(value.)X +825(Out)X +971(of)X +1064(these)X +1254(32)X +1359(bits,)X +1519(only)X +1686(as)X +1778(many)X +1981(bits)X +2121(as)X +432 2130(necessary)N +773(are)X +900(used)X +1075(to)X +1165(determine)X +1514(the)X +1639(particular)X +1974(bucket)X +432 2218(on)N +533(which)X +750(a)X +807(key)X +944(resides.)X +1228(An)X +1347(in-memory)X +1724(bitmap)X +1967(is)X +2041(used)X +432 2306(to)N +533(determine)X +893(how)X +1070(many)X +1287(bits)X +1441(are)X +1579(required.)X +1905(Each)X +2104(bit)X +432 2394(indicates)N +746(whether)X +1033(its)X +1136(associated)X +1494(bucket)X +1736(has)X +1871(been)X +2051(split)X +432 2482(yet)N +562(\(a)X +657(0)X +728(indicating)X +1079(that)X +1230(the)X +1359(bucket)X +1604(has)X +1742(not)X +1875(yet)X +2004(split\).)X +432 2570(The)N +590(use)X +730(of)X +830(the)X +961(hash)X +1141(function)X +1441(and)X +1590(the)X +1720(bitmap)X +1974(is)X +2059(best)X +432 2658(described)N +769(by)X +878(stepping)X +1177(through)X +1454(database)X +1759(creation)X +2046(with)X +432 2746(multiple)N +718(invocations)X +1107(of)X +1194(a)X +2 f +1250(store)X +1 f +1430(operation.)X +604 2860(Initially,)N +906(the)X +1033(hash)X +1209(table)X +1394(contains)X +1690(a)X +1755(single)X +1974(bucket)X +432 2948(\(bucket)N +711(0\),)X +836(the)X +972(bit)X +1094(map)X +1270(contains)X +1575(a)X +1649(single)X +1878(bit)X +2000(\(bit)X +2148(0)X +432 3036(corresponding)N +913(to)X +997(bucket)X +1233(0\),)X +1342(and)X +1480(0)X +1542(bits)X +1699(of)X +1788(a)X +1846(hash)X +2014(value)X +432 3124(are)N +560(examined)X +901(to)X +992(determine)X +1342(where)X +1568(a)X +1633(key)X +1778(is)X +1860(placed)X +2099(\(in)X +432 3212(bucket)N +670(0\).)X +801(When)X +1017(bucket)X +1255(0)X +1319(is)X +1396(full,)X +1551(its)X +1650(bit)X +1758(in)X +1844(the)X +1966(bitmap)X +432 3300(\(bit)N +564(0\))X +652(is)X +726(set,)X +856(and)X +993(its)X +1089(contents)X +1377(are)X +1497(split)X +1655(between)X +1943(buckets)X +432 3388(0)N +499(and)X +641(1,)X +727(by)X +833(considering)X +1233(the)X +1357(0)X +2 f +7 s +3356(th)Y +10 s +1 f +1480 3388(bit)N +1590(\(the)X +1741(lowest)X +1976(bit)X +2086(not)X +432 3476(previously)N +800(examined\))X +1169(of)X +1266(the)X +1393(hash)X +1569(value)X +1772(for)X +1895(each)X +2072(key)X +432 3564(within)N +668(the)X +798(bucket.)X +1064(Given)X +1292(a)X +1359(well-designed)X +1840(hash)X +2018(func-)X +432 3652(tion,)N +613(approximately)X +1112(half)X +1273(of)X +1376(the)X +1510(keys)X +1693(will)X +1853(have)X +2041(hash)X +432 3740(values)N +666(with)X +837(the)X +964(0)X +2 f +7 s +3708(th)Y +10 s +1 f +1090 3740(bit)N +1203(set.)X +1341(All)X +1471(such)X +1646(keys)X +1821(and)X +1965(associ-)X +432 3828(ated)N +586(data)X +740(are)X +859(moved)X +1097(to)X +1179(bucket)X +1413(1,)X +1493(and)X +1629(the)X +1747(rest)X +1883(remain)X +2126(in)X +432 3916(bucket)N +666(0.)X +604 4030(After)N +804(this)X +949(split,)X +1135(the)X +1262(\256le)X +1393(now)X +1560(contains)X +1856(two)X +2005(buck-)X +432 4118(ets,)N +562(and)X +699(the)X +818(bitmap)X +1061(contains)X +1349(three)X +1530(bits:)X +1687(the)X +1805(0)X +2 f +7 s +4086(th)Y +10 s +1 f +1922 4118(bit)N +2026(is)X +2099(set)X +432 4206(to)N +525(indicate)X +810(a)X +876(bucket)X +1120(0)X +1190(split)X +1357(when)X +1561(no)X +1671(bits)X +1816(of)X +1913(the)X +2041(hash)X +432 4294(value)N +648(are)X +789(considered,)X +1199(and)X +1357(two)X +1519(more)X +1726(unset)X +1937(bits)X +2094(for)X +432 4382(buckets)N +706(0)X +775(and)X +920(1.)X +1029(The)X +1183(placement)X +1542(of)X +1638(an)X +1742(incoming)X +2072(key)X +432 4470(now)N +604(requires)X +897(examination)X +1327(of)X +1428(the)X +1560(0)X +2 f +7 s +4438(th)Y +10 s +1 f +1691 4470(bit)N +1809(of)X +1910(the)X +2041(hash)X +432 4558(value,)N +667(and)X +824(the)X +963(key)X +1119(is)X +1212(placed)X +1462(either)X +1685(in)X +1787(bucket)X +2041(0)X +2121(or)X +432 4646(bucket)N +674(1.)X +782(If)X +864(either)X +1075(bucket)X +1317(0)X +1385(or)X +1480(bucket)X +1722(1)X +1790(\256lls)X +1937(up,)X +2064(it)X +2135(is)X +432 4734(split)N +598(as)X +693(before,)X +947(its)X +1050(bit)X +1162(is)X +1243(set)X +1360(in)X +1450(the)X +1576(bitmap,)X +1846(and)X +1990(a)X +2054(new)X +432 4822(set)N +541(of)X +628(unset)X +817(bits)X +952(are)X +1071(added)X +1283(to)X +1365(the)X +1483(bitmap.)X +604 4936(Each)N +791(time)X +959(we)X +1079(consider)X +1376(a)X +1437(new)X +1596(bit)X +1705(\(bit)X +1841(n\),)X +1953(we)X +2072(add)X +432 5024(2)N +2 f +7 s +4992(n)Y +9 f +509(+)X +1 f +540(1)X +10 s +595 5024(bits)N +737(to)X +826(the)X +951(bitmap)X +1199(and)X +1341(obtain)X +1567(2)X +2 f +7 s +4992(n)Y +9 f +1644(+)X +1 f +1675(1)X +10 s +1729 5024(more)N +1920(address-)X +432 5112(able)N +595(buckets)X +869(in)X +960(the)X +1087(\256le.)X +1258(As)X +1376(a)X +1441(result,)X +1668(the)X +1795(bitmap)X +2045(con-)X +432 5200(tains)N +618(the)X +751(previous)X +1062(2)X +2 f +7 s +5168(n)Y +9 f +1139(+)X +1 f +1170(1)X +2 f +10 s +9 f +5200(-)Y +1 f +1242(1)X +1317(bits)X +1467(\(1)X +2 f +9 f +1534(+)X +1 f +1578(2)X +2 f +9 f +(+)S +1 f +1662(4)X +2 f +9 f +(+)S +1 f +1746(...)X +2 f +9 f +(+)S +1 f +1850(2)X +2 f +7 s +5168(n)Y +10 s +1 f +1931 5200(\))N +1992(which)X +432 5288(trace)N +649(the)X +807(entire)X +2 f +1050(split)X +1247(history)X +1 f +1529(of)X +1656(the)X +1813(addressable)X +16 s +432 5433 MXY +864 0 Dl +2 f +8 s +472 5488(2)N +1 f +9 s +523 5513(This)N +670(bit-randomizing)X +1153(property)X +1416(is)X +1482(important)X +1780(to)X +1854(obtain)X +2052(radi-)X +432 5593(cally)N +599(different)X +874(hash)X +1033(values)X +1244(for)X +1355(nearly)X +1562(identical)X +1836(keys,)X +2012(which)X +432 5673(in)N +506(turn)X +640(avoids)X +846(clustering)X +1148(of)X +1226(such)X +1376(keys)X +1526(in)X +1600(a)X +1650(single)X +1840(bucket.)X +10 s +2418 538(buckets.)N +2590 652(Given)N +2809(a)X +2868(key)X +3007(and)X +3146(the)X +3267(bitmap)X +3512(created)X +3768(by)X +3871(this)X +4009(algo-)X +2418 740(rithm,)N +2638(we)X +2759(\256rst)X +2910(examine)X +3209(bit)X +3320(0)X +3386(of)X +3479(the)X +3603(bitmap)X +3851(\(the)X +4002(bit)X +4112(to)X +2418 828(consult)N +2673(when)X +2871(0)X +2934(bits)X +3072(of)X +3162(the)X +3283(hash)X +3453(value)X +3650(are)X +3772(being)X +3973(exam-)X +2418 916(ined\).)N +2631(If)X +2713(it)X +2785(is)X +2866(set)X +2982(\(indicating)X +3356(that)X +3503(the)X +3628(bucket)X +3869(split\),)X +4080(we)X +2418 1004(begin)N +2617(considering)X +3012(the)X +3131(bits)X +3267(of)X +3355(the)X +3473(32-bit)X +3684(hash)X +3851(value.)X +4085(As)X +2418 1092(bit)N +2525(n)X +2587(is)X +2662(revealed,)X +2977(a)X +3035(mask)X +3226(equal)X +3422(to)X +3506(2)X +2 f +7 s +1060(n)Y +9 f +3583(+)X +1 f +3614(1)X +2 f +10 s +9 f +1092(-)Y +1 f +3686(1)X +3748(will)X +3894(yield)X +4076(the)X +2418 1180(current)N +2675(bucket)X +2918(address.)X +3228(Adding)X +3496(2)X +2 f +7 s +1148(n)Y +9 f +3573(+)X +1 f +3604(1)X +2 f +10 s +9 f +1180(-)Y +1 f +3676(1)X +3744(to)X +3834(the)X +3960(bucket)X +2418 1268(address)N +2701(identi\256es)X +3035(which)X +3272(bit)X +3397(in)X +3500(the)X +3639(bitmap)X +3902(must)X +4098(be)X +2418 1356(checked.)N +2743(We)X +2876(continue)X +3173(revealing)X +3493(bits)X +3628(of)X +3715(the)X +3833(hash)X +4000(value)X +2418 1444(until)N +2591(all)X +2698(set)X +2814(bits)X +2955(in)X +3043(the)X +3167(bitmap)X +3415(are)X +3540(exhausted.)X +3907(The)X +4058(fol-)X +2418 1532(lowing)N +2682(algorithm,)X +3055(a)X +3133(simpli\256cation)X +3614(of)X +3723(the)X +3863(algorithm)X +2418 1620(due)N +2565(to)X +2658(Ken)X +2823(Thompson)X +3196([THOM90,)X +3590(TOR88],)X +3908(uses)X +4076(the)X +2418 1708(hash)N +2625(value)X +2839(and)X +2995(the)X +3133(bitmap)X +3395(to)X +3497(calculate)X +3823(the)X +3960(bucket)X +2418 1796(address)N +2679(as)X +2766(discussed)X +3093(above.)X +0(Courier)xf 0 f +1 f +0 f +8 s +2418 2095(hash)N +2608(=)X +2684 -0.4038(calchash\(key\);)AX +2418 2183(mask)N +2608(=)X +2684(0;)X +2418 2271(while)N +2646 -0.4018(\(isbitset\(\(hash)AX +3254(&)X +3330(mask\))X +3558(+)X +3634(mask\)\))X +2706 2359(mask)N +2896(=)X +2972(\(mask)X +3200(<<)X +3314(1\))X +3428(+)X +3504(1;)X +2418 2447(bucket)N +2684(=)X +2760(hash)X +2950(&)X +3026(mask;)X +2 f +10 s +3211 2812(sdbm)N +1 f +2590 2944(The)N +2 f +2738(sdbm)X +1 f +2930(library)X +3167(is)X +3243(a)X +3302(public-domain)X +3791(clone)X +3987(of)X +4076(the)X +2 f +2418 3032(ndbm)N +1 f +2638(library,)X +2914(developed)X +3286(by)X +3408(Ozan)X +3620(Yigit)X +3826(to)X +3929(provide)X +2 f +2418 3120(ndbm)N +1 f +2596('s)X +2692(functionality)X +3139(under)X +3359(some)X +3565(versions)X +3869(of)X +3973(UNIX)X +2418 3208(that)N +2559(exclude)X +2830(it)X +2894(for)X +3008(licensing)X +3317(reasons)X +3578([YIG89].)X +3895(The)X +4040(pro-)X +2418 3296(grammer)N +2735(interface,)X +3064(and)X +3207(the)X +3332(basic)X +3524(structure)X +3832(of)X +2 f +3926(sdbm)X +1 f +4121(is)X +2418 3384(identical)N +2733(to)X +2 f +2834(ndbm)X +1 f +3051(but)X +3192(internal)X +3476(details)X +3723(of)X +3828(the)X +2 f +3964(access)X +1 f +2418 3472(function,)N +2726(such)X +2894(as)X +2982(the)X +3101(calculation)X +3474(of)X +3561(the)X +3679(bucket)X +3913(address,)X +2418 3560(and)N +2563(the)X +2690(use)X +2825(of)X +2920(different)X +3225(hash)X +3400(functions)X +3726(make)X +3928(the)X +4054(two)X +2418 3648(incompatible)N +2856(at)X +2934(the)X +3052(database)X +3349(level.)X +2590 3762(The)N +2 f +2740(sdbm)X +1 f +2934(library)X +3173(is)X +3251(based)X +3458(on)X +3562(a)X +3622(simpli\256ed)X +3965(imple-)X +2418 3850(mentation)N +2778(of)X +2885(Larson's)X +3206(1978)X +2 f +3406(dynamic)X +3717(hashing)X +1 f +4009(algo-)X +2418 3938(rithm)N +2616(including)X +2943(the)X +2 f +3066(re\256nements)X +3461(and)X +3605(variations)X +1 f +3953(of)X +4044(sec-)X +2418 4026(tion)N +2562(5)X +2622([LAR78].)X +2956(Larson's)X +3257(original)X +3526(algorithm)X +3857(calls)X +4024(for)X +4138(a)X +2418 4114(forest)N +2635(of)X +2736(binary)X +2975(hash)X +3156(trees)X +3341(that)X +3494(are)X +3626(accessed)X +3941(by)X +4054(two)X +2418 4202(hash)N +2586(functions.)X +2925(The)X +3071(\256rst)X +3216(hash)X +3384(function)X +3672(selects)X +3907(a)X +3964(partic-)X +2418 4290(ular)N +2571(tree)X +2720(within)X +2952(the)X +3078(forest.)X +3309(The)X +3462(second)X +3713(hash)X +3887(function,)X +2418 4378(which)N +2659(is)X +2757(required)X +3070(to)X +3177(be)X +3297(a)X +3377(boolean)X +3675(pseudo-random)X +2418 4466(number)N +2687(generator)X +3015(that)X +3159(is)X +3236(seeded)X +3479(by)X +3583(the)X +3705(key,)X +3865(is)X +3942(used)X +4112(to)X +2418 4554(traverse)N +2733(the)X +2890(tree)X +3070(until)X +3275(internal)X +3579(\(split\))X +3829(nodes)X +4075(are)X +2418 4642(exhausted)N +2763(and)X +2903(an)X +3003(external)X +3286(\(non-split\))X +3648(node)X +3827(is)X +3903(reached.)X +2418 4730(The)N +2571(bucket)X +2813(addresses)X +3149(are)X +3276(stored)X +3500(directly)X +3772(in)X +3861(the)X +3986(exter-)X +2418 4818(nal)N +2536(nodes.)X +2590 4932(Larson's)N +2903(re\256nements)X +3309(are)X +3440(based)X +3655(on)X +3767(the)X +3897(observa-)X +2418 5020(tion)N +2570(that)X +2718(the)X +2844(nodes)X +3059(can)X +3199(be)X +3303(represented)X +3702(by)X +3809(a)X +3872(single)X +4090(bit)X +2418 5108(that)N +2569(is)X +2653(set)X +2773(for)X +2898(internal)X +3174(nodes)X +3392(and)X +3539(not)X +3672(set)X +3791(for)X +3915(external)X +2418 5196(nodes,)N +2652(resulting)X +2959(in)X +3048(a)X +3111(radix)X +3303(search)X +3536(trie.)X +3709(Figure)X +3944(1)X +4010(illus-)X +2418 5284(trates)N +2621(this.)X +2804(Nodes)X +3037(A)X +3123(and)X +3267(B)X +3348(are)X +3475(internal)X +3748(\(split\))X +3967(nodes,)X +2418 5372(thus)N +2573(having)X +2813(no)X +2915(bucket)X +3151(addresses)X +3480(associated)X +3831(with)X +3994(them.)X +2418 5460(Instead,)N +2693(the)X +2814(external)X +3096(nodes)X +3306(\(C,)X +3429(D,)X +3530(and)X +3669(E\))X +3768(each)X +3938(need)X +4112(to)X +2418 5548(refer)N +2594(to)X +2679(a)X +2738(bucket)X +2975(address.)X +3279(These)X +3494(bucket)X +3731(addresses)X +4062(can)X +2418 5636(be)N +2529(stored)X +2760(in)X +2857(the)X +2990(trie)X +3132(itself)X +3327(where)X +3559(the)X +3691(subtries)X +3974(would)X +3 f +432 5960(2)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +3 p +%%Page: 3 3 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(live)N +862(if)X +933(they)X +1092(existed)X +1340([KNU68].)X +1709(For)X +1841(example,)X +2154(if)X +2224(nodes)X +2432(F)X +720 626(and)N +858(G)X +938(were)X +1117(the)X +1237(children)X +1522(of)X +1610(node)X +1787(C,)X +1881(the)X +2000(bucket)X +2235(address)X +720 714(L00)N +886(could)X +1101(reside)X +1330(in)X +1429(the)X +1563(bits)X +1714(that)X +1870(will)X +2030(eventually)X +2400(be)X +720 802(used)N +887(to)X +969(store)X +1145(nodes)X +1352(F)X +1416(and)X +1552(G)X +1630(and)X +1766(all)X +1866(their)X +2033(children.)X +10 f +720 890 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1894 2247(L1)N +784 1925(A)N +1431(E)X +1106 2247(D)N +1428 1281(C)N +1109 1603(B)N +1884 1930(L01)N +1879 1286(L00)N +1221 1814(1)N +903 2131(1)N +1221 1402(0)N +903 1714(0)N +1 Dt +1397 1821 MXY +-8 -32 Dl +-5 19 Dl +-20 6 Dl +33 7 Dl +-187 -182 Dl +1397 1322 MXY +-33 7 Dl +20 6 Dl +5 19 Dl +8 -32 Dl +-187 182 Dl +1069 1639 MXY +-32 7 Dl +20 6 Dl +5 19 Dl +7 -32 Dl +-186 182 Dl +1374 1891 MXY +185 Dc +1779 2133 MXY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1811 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1166 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1052 2213 MXY +185 Dc +1569 MY +185 Dc +720 1881 MXY +185 Dc +1779 2213 MXY +-28 -17 Dl +10 17 Dl +-10 18 Dl +28 -18 Dl +-543 0 Dl +1769 1891 MXY +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1364 1247 MXY +185 Dc +1769 MX +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1064 2143 MXY +-7 -32 Dl +-5 19 Dl +-20 6 Dl +32 7 Dl +-181 -181 Dl +3 Dt +-1 Ds +8 s +720 2482(Figure)N +925(1:)X +1 f +1002(Radix)X +1179(search)X +1365(trie)X +1474(with)X +1612(internal)X +1831(nodes)X +2004(A)X +2074(and)X +2189(B,)X +2271(external)X +720 2570(nodes)N +891(C,)X +972(D,)X +1056(and)X +1170(E,)X +1247(and)X +1361(bucket)X +1553(addresses)X +1819(stored)X +1997(in)X +2069(the)X +2168(unused)X +2370(por-)X +720 2658(tion)N +836(of)X +905(the)X +999(trie.)X +10 s +10 f +720 2922 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3124(Further)N +1153(simpli\256cations)X +1647(of)X +1738(the)X +1860(above)X +2076([YIG89])X +2377(are)X +720 3212(possible.)N +1038(Using)X +1265(a)X +1337(single)X +1564(radix)X +1765(trie)X +1908(to)X +2006(avoid)X +2219(the)X +2352(\256rst)X +720 3300(hash)N +904(function,)X +1227(replacing)X +1562(the)X +1696(pseudo-random)X +2231(number)X +720 3388(generator)N +1052(with)X +1222(a)X +1286(well)X +1452(designed,)X +1785(bit-randomizing)X +2329(hash)X +720 3476(function,)N +1053(and)X +1215(using)X +1434(the)X +1578(portion)X +1855(of)X +1967(the)X +2110(hash)X +2302(value)X +720 3564(exposed)N +1021(during)X +1268(the)X +1404(trie)X +1549(traversal)X +1864(as)X +1969(a)X +2042(direct)X +2262(bucket)X +720 3652(address)N +990(results)X +1228(in)X +1319(an)X +2 f +1424(access)X +1 f +1663(function)X +1959(that)X +2108(works)X +2333(very)X +720 3740(similar)N +974(to)X +1068(Thompson's)X +1499(algorithm)X +1841(above.)X +2084(The)X +2240(follow-)X +720 3828(ing)N +847(algorithm)X +1183(uses)X +1346(the)X +1469(hash)X +1641(value)X +1840(to)X +1927(traverse)X +2206(a)X +2266(linear-)X +720 3916(ized)N +874(radix)X +1059(trie)X +2 f +8 s +1166 3891(3)N +1 f +10 s +1218 3916(starting)N +1478(at)X +1556(the)X +1674(0)X +2 f +7 s +3884(th)Y +10 s +1 f +1791 3916(bit.)N +0 f +8 s +720 4215(tbit)N +910(=)X +986(0;)X +1296(/*)X +1410(radix)X +1638(trie)X +1828(index)X +2056(*/)X +720 4303(hbit)N +910(=)X +986(0;)X +1296(/*)X +1410(hash)X +1600(bit)X +1752(index)X +2056(*/)X +720 4391(mask)N +910(=)X +986(0;)X +720 4479(hash)N +910(=)X +986 -0.4038(calchash\(key\);)AX +720 4655(for)N +872(\(mask)X +1100(=)X +1176(0;)X +910 4743 -0.4018(isbitset\(tbit\);)AN +910 4831(mask)N +1100(=)X +1176(\(mask)X +1404(<<)X +1518(1\))X +1632(+)X +1708(1\))X +1008 4919(if)N +1122(\(hash)X +1350(&)X +1426(\(1)X +1540(<<)X +1654 -0.4219(hbit++\)\)\))AX +1160 5007(/*)N +1274(right)X +1502(son)X +1692(*/)X +1160 5095(tbit)N +1350(=)X +1426(2)X +1502(*)X +1578(tbit)X +1768(+)X +1844(2;)X +1008 5183(else)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(3)N +1 f +9 s +818 5433(A)N +896(linearized)X +1206(radix)X +1380(trie)X +1502(is)X +1576(merely)X +1802(an)X +1895(array)X +2068(representation)X +720 5513(of)N +800(the)X +908(radix)X +1076(search)X +1280(trie)X +1396(described)X +1692(above.)X +1920(The)X +2052(children)X +2308(of)X +2388(the)X +720 5593(node)N +885(with)X +1038(index)X +1223(i)X +1267(can)X +1391(be)X +1483(found)X +1675(at)X +1751(the)X +1863(nodes)X +2055(indexed)X +2307(2*i+1)X +720 5673(and)N +842(2*i+2.)X +0 f +8 s +3146 538(/*)N +3260(left)X +3450(son)X +3678(*/)X +3146 626(tbit)N +3336(=)X +3412(2)X +3488(*)X +3564(tbit)X +3754(+)X +3830(1;)X +2706 802(bucket)N +2972(=)X +3048(hash)X +3238(&)X +3314(mask;)X +2 f +10 s +3495 1167(gdbm)N +1 f +2878 1299(The)N +3027(gdbm)X +3233(\(GNU)X +3458(data)X +3616(base)X +3783(manager\))X +4111(library)X +4349(is)X +4426(a)X +2706 1387(UNIX)N +2933(database)X +3236(manager)X +3539(written)X +3792(by)X +3897(Philip)X +4112(A.)X +4215(Nelson,)X +2706 1475(and)N +2848(made)X +3048(available)X +3364(as)X +3457(a)X +3518(part)X +3668(of)X +3760(the)X +3883(FSF)X +4040(software)X +4342(dis-)X +2706 1563(tribution.)N +3052(The)X +3207(gdbm)X +3419(library)X +3663(provides)X +3969(the)X +4097(same)X +4292(func-)X +2706 1651(tionality)N +3028(of)X +3151(the)X +2 f +3304(dbm)X +1 f +3442(/)X +2 f +3464(ndbm)X +1 f +3697(libraries)X +4015([NEL90])X +4360(but)X +2706 1739(attempts)N +3018(to)X +3121(avoid)X +3340(some)X +3550(of)X +3658(their)X +3846(shortcomings.)X +4337(The)X +2706 1827(gdbm)N +2918(library)X +3162(allows)X +3401(for)X +3525(arbitrary-length)X +4059(data,)X +4242(and)X +4387(its)X +2706 1915(database)N +3027(is)X +3124(a)X +3203(singular,)X +3524(non-sparse)X +2 f +8 s +3872 1890(4)N +1 f +10 s +3947 1915(\256le.)N +4112(The)X +4280(gdbm)X +2706 2003(library)N +2947(also)X +3103(includes)X +2 f +3396(dbm)X +1 f +3560(and)X +2 f +3702(ndbm)X +1 f +3906(compatible)X +4288(inter-)X +2706 2091(faces.)N +2878 2205(The)N +3025(gdbm)X +3229(library)X +3465(is)X +3540(based)X +3745(on)X +2 f +3847(extensible)X +4189(hashing)X +1 f +4442(,)X +2706 2293(a)N +2766(dynamic)X +3066(hashing)X +3339(algorithm)X +3674(by)X +3778(Fagin)X +3984(et)X +4066(al)X +4148([FAG79].)X +2706 2381(This)N +2881(algorithm)X +3225(differs)X +3467(from)X +3655(the)X +3785(previously)X +4155(discussed)X +2706 2469(algorithms)N +3069(in)X +3152(that)X +3293(it)X +3358(uses)X +3517(a)X +2 f +3574(directory)X +1 f +3889(that)X +4030(is)X +4103(a)X +4159(collapsed)X +2706 2557(representation)N +3192([ENB88])X +3517(of)X +3615(the)X +3744(radix)X +3940(search)X +4177(trie)X +4315(used)X +2706 2645(by)N +2 f +2806(sdbm)X +1 f +2975(.)X +10 f +2706 2733 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +3572 3761(L1)N +1 Dt +3485 3738 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-400 0 Dl +3180 3027 MXY +136 Dc +2706 3494 MXY +136 Dc +2950 3264 MXY +136 Dc +3738 MY +136 Dc +3485 2968 MXY +0 118 Dl +238 0 Dl +0 -118 Dl +-238 0 Dl +3442 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3679 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3187 3501 MXY +136 Dc +2963 3316 MXY +-24 5 Dl +15 4 Dl +4 15 Dl +5 -24 Dl +-137 134 Dl +3204 3083 MXY +-24 5 Dl +15 4 Dl +3 14 Dl +6 -23 Dl +-137 133 Dl +3204 3450 MXY +-6 -24 Dl +-3 14 Dl +-15 5 Dl +24 5 Dl +-137 -134 Dl +2842 3369(0)N +3075 3139(0)N +2842 3676(1)N +3075 3443(1)N +3562 3054(L00)N +3565 3528(L01)N +4197 2968 MXY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3205 MY +0 119 Dl +237 0 Dl +0 -119 Dl +-237 0 Dl +3561 MY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3960 2909 MXY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3146 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3383 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3620 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +4197 3027 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +4197 3264 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +3501 MY +59 0 Dl +0 89 Dl +4078 3738 MXY +59 0 Dl +0 -88 Dl +4197 3590 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +4197 3650 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +3991 3050(00)N +3991 3287(01)N +3991 3524(10)N +3991 3761(11)N +4269 3050(L00)N +4269 3287(L01)N +4283 3643(L1)N +3485 3501 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-155 0 Dl +3485 3027 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-163 0 Dl +2967 3687 MXY +-5 -24 Dl +-4 14 Dl +-15 4 Dl +24 6 Dl +-141 -141 Dl +3 Dt +-1 Ds +8 s +2706 4033(Figure)N +2903(2:)X +1 f +2972(A)X +3034(radix)X +3181(search)X +3359(trie)X +3460(and)X +3568(a)X +3612(directory)X +3858(representing)X +4189(the)X +4283(trie.)X +10 s +10 f +2706 4209 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2878 4411(In)N +2968(this)X +3106(algorithm,)X +3460(a)X +3519(directory)X +3832(consists)X +4108(of)X +4198(a)X +4256(search)X +2706 4499(trie)N +2847(of)X +2947(depth)X +2 f +3158(n)X +1 f +3211(,)X +3264(containing)X +3635(2)X +2 f +7 s +4467(n)Y +10 s +1 f +3749 4499(bucket)N +3996(addresses)X +4337(\(i.e.)X +2706 4587(each)N +2897(element)X +3194(of)X +3304(the)X +3445(trie)X +3594(is)X +3689(a)X +3767(bucket)X +4023(address\).)X +4373(To)X +2706 4675(access)N +2935(the)X +3056(hash)X +3226(table,)X +3425(a)X +3483(32-bit)X +3696(hash)X +3865(value)X +4061(is)X +4136(calculated)X +2706 4763(and)N +2 f +2861(n)X +1 f +2953(bits)X +3107(of)X +3213(the)X +3350(value)X +3563(are)X +3701(used)X +3886(to)X +3986(index)X +4202(into)X +4364(the)X +2706 4851(directory)N +3018(to)X +3102(obtain)X +3324(a)X +3382(bucket)X +3618(address.)X +3921(It)X +3992(is)X +4067(important)X +4400(to)X +2706 4939(note)N +2866(that)X +3008(multiple)X +3296(entries)X +3532(of)X +3620(this)X +3756(directory)X +4067(may)X +4226(contain)X +2706 5027(the)N +2833(same)X +3026(bucket)X +3268(address)X +3537(as)X +3632(a)X +3696(result)X +3902(of)X +3997(directory)X +4315(dou-)X +2706 5115(bling)N +2903(during)X +3145(bucket)X +3392(splitting.)X +3706(Figure)X +3948(2)X +4021(illustrates)X +4364(the)X +2706 5203(relationship)N +3126(between)X +3436(a)X +3513(typical)X +3772(\(skewed\))X +4108(search)X +4355(trie)X +2706 5291(and)N +2850(its)X +2953(directory)X +3271(representation.)X +3774(The)X +3927(formation)X +4270(of)X +4364(the)X +2706 5379(directory)N +3016(shown)X +3245(in)X +3327(the)X +3445(\256gure)X +3652(is)X +3725(as)X +3812(follows.)X +16 s +2706 5593 MXY +864 0 Dl +2 f +8 s +2746 5648(4)N +1 f +9 s +2796 5673(It)N +2858(does)X +3008(not)X +3118(contain)X +3348(holes.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(3)X + +4 p +%%Page: 4 4 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +604 538(Initially,)N +937(there)X +1158(is)X +1271(one)X +1446(slot)X +1620(in)X +1741(the)X +1898(directory)X +432 626(addressing)N +802(a)X +865(single)X +1083(bucket.)X +1364(The)X +1515(depth)X +1719(of)X +1812(the)X +1936(trie)X +2069(is)X +2148(0)X +432 714(and)N +577(0)X +646(bits)X +790(of)X +886(each)X +1063(hash)X +1239(value)X +1442(are)X +1570(examined)X +1910(to)X +2000(deter-)X +432 802(mine)N +624(in)X +718(which)X +946(bucket)X +1192(to)X +1286(place)X +1488(a)X +1556(key;)X +1726(all)X +1837(keys)X +2015(go)X +2126(in)X +432 890(bucket)N +682(0.)X +797(When)X +1024(this)X +1174(bucket)X +1423(is)X +1511(full,)X +1677(its)X +1787(contents)X +2089(are)X +432 978(divided)N +698(between)X +992(L0)X +1107(and)X +1249(L1)X +1363(as)X +1455(was)X +1605(done)X +1786(in)X +1873(the)X +1996(previ-)X +432 1066(ously)N +664(discussed)X +1030(algorithms.)X +1471(After)X +1700(this)X +1874(split,)X +2090(the)X +432 1154(address)N +710(of)X +814(the)X +948(second)X +1207(bucket)X +1457(must)X +1648(be)X +1760(stored)X +1992(in)X +2090(the)X +432 1242(directory.)N +796(To)X +939(accommodate)X +1438(the)X +1589(new)X +1776(address,)X +2090(the)X +432 1330(directory)N +752(is)X +835(split)X +2 f +8 s +972 1305(5)N +1 f +10 s +1330(,)Y +1054(by)X +1163(doubling)X +1476(it,)X +1569(thus)X +1731(increasing)X +2090(the)X +432 1418(depth)N +630(of)X +717(the)X +835(directory)X +1145(by)X +1245(one.)X +604 1532(After)N +813(this)X +967(split,)X +1163(a)X +1237(single)X +1466(bit)X +1588(of)X +1693(the)X +1829(hash)X +2014(value)X +432 1620(needs)N +663(to)X +773(be)X +896(examined)X +1255(to)X +1364(decide)X +1621(whether)X +1927(the)X +2072(key)X +432 1708(belongs)N +711(to)X +803(L0)X +922(or)X +1019(L1.)X +1158(Once)X +1358(one)X +1504(of)X +1601(these)X +1795(buckets)X +2069(\256lls)X +432 1796(\(L0)N +578(for)X +702(example\),)X +1051(it)X +1125(is)X +1208(split)X +1375(as)X +1472(before,)X +1728(and)X +1873(the)X +2000(direc-)X +432 1884(tory)N +585(is)X +662(split)X +823(again)X +1021(to)X +1107(make)X +1305(room)X +1498(for)X +1615(the)X +1736(address)X +2000(of)X +2090(the)X +432 1972(third)N +618(bucket.)X +927(This)X +1104(splitting)X +1400(causes)X +1645(the)X +1778(addresses)X +2121(of)X +432 2060(the)N +567(non-splitting)X +1012(bucket)X +1263(\(L1\))X +1443(to)X +1541(be)X +1653(duplicated.)X +2063(The)X +432 2148(directory)N +766(now)X +948(has)X +1099(four)X +1277(entries,)X +1555(a)X +1635(depth)X +1857(of)X +1968(2,)X +2072(and)X +432 2236(indexes)N +700(the)X +821(buckets)X +1089(L00,)X +1261(L01)X +1413(and)X +1552(L1,)X +1684(as)X +1774(shown)X +2006(in)X +2090(the)X +432 2324(Figure)N +661(2.)X +604 2438(The)N +756(crucial)X +1002(part)X +1154(of)X +1247(the)X +1371(algorithm)X +1708(is)X +1787(the)X +1911(observa-)X +432 2526(tion)N +580(that)X +724(L1)X +837(is)X +914(addressed)X +1255(twice)X +1453(in)X +1539(the)X +1661(directory.)X +1995(If)X +2073(this)X +432 2614(bucket)N +679(were)X +869(to)X +964(split)X +1134(now,)X +1324(the)X +1454(directory)X +1776(already)X +2045(con-)X +432 2702(tains)N +611(room)X +808(to)X +898(hold)X +1067(the)X +1192(address)X +1460(of)X +1554(the)X +1679(new)X +1840(bucket.)X +2121(In)X +432 2790(general,)N +711(the)X +831(relationship)X +1231(between)X +1521(the)X +1641(directory)X +1953(and)X +2090(the)X +432 2878(number)N +704(of)X +798(bucket)X +1039(addresses)X +1374(contained)X +1713(therein)X +1962(is)X +2041(used)X +432 2966(to)N +517(decide)X +750(when)X +947(to)X +1031(split)X +1190(the)X +1310(directory.)X +1662(Each)X +1845(bucket)X +2081(has)X +432 3054(a)N +505(depth,)X +740(\()X +2 f +767(n)X +7 s +3070(b)Y +10 s +1 f +848 3054(\),)N +932(associated)X +1299(with)X +1478(it)X +1558(and)X +1710(appears)X +1992(in)X +2090(the)X +432 3142(directory)N +744(exactly)X +998(2)X +2 f +7 s +3106(n)Y +9 f +1075(-)X +2 f +1106(n)X +4 s +3110(b)Y +7 s +1 f +10 s +1181 3142(times.)N +1396(When)X +1610(a)X +1668(bucket)X +1904(splits,)X +2113(its)X +432 3230(depth)N +638(increases)X +961(by)X +1069(one.)X +1253(The)X +1406(directory)X +1724(must)X +1907(split)X +2072(any)X +432 3318(time)N +602(a)X +665(bucket's)X +964(depth)X +1169(exceeds)X +1451(the)X +1576(depth)X +1781(of)X +1875(the)X +2000(direc-)X +432 3406(tory.)N +630(The)X +784(following)X +1123(code)X +1303(fragment)X +1621(helps)X +1818(to)X +1908(illustrate)X +432 3494(the)N +554(extendible)X +912(hashing)X +1185(algorithm)X +1520([FAG79])X +1838(for)X +1955(access-)X +432 3582(ing)N +554(individual)X +898(buckets)X +1163(and)X +1299(maintaining)X +1701(the)X +1819(directory.)X +0 f +8 s +432 3881(hash)N +622(=)X +698 -0.4038(calchash\(key\);)AX +432 3969(mask)N +622(=)X +698 -0.4018(maskvec[depth];)AX +432 4145(bucket)N +698(=)X +774 -0.4038(directory[hash)AX +1344(&)X +1420(mask];)X +432 4321(/*)N +546(Key)X +698 -0.4219(Insertion)AX +1078(*/)X +432 4409(if)N +546 -0.4038(\(store\(bucket,)AX +1116(key,)X +1306(data\))X +1534(==)X +1648(FAIL\))X +1876({)X +720 4497(newbl)N +948(=)X +1024 -0.4167(getpage\(\);)AX +720 4585 -0.4000(bucket->depth++;)AN +720 4673 -0.4091(newbl->depth)AN +1214(=)X +1290 -0.4038(bucket->depth;)AX +720 4761(if)N +834 -0.4038(\(bucket->depth)AX +1404(>)X +1480(depth\))X +1746({)X +1008 4849(/*)N +1122(double)X +1388 -0.4219(directory)AX +1768(*/)X +1008 4937(depth++;)N +1 f +16 s +432 5033 MXY +864 0 Dl +2 f +8 s +472 5088(5)N +1 f +9 s +534 5113(This)N +692(decision)X +962(to)X +1048(split)X +1202(the)X +1319(directory)X +1608(is)X +1685(based)X +1878(on)X +1979(a)X +2040(com-)X +432 5193(parison)N +666(of)X +748(the)X +858(depth)X +1040(of)X +1121(the)X +1230(page)X +1387(being)X +1568(split)X +1713(and)X +1838(the)X +1947(depth)X +2128(of)X +432 5273(the)N +543(trie.)X +698(In)X +781(Figure)X +992(2,)X +1069(the)X +1180(depths)X +1390(of)X +1472(both)X +1622(L00)X +1760(and)X +1886(L01)X +2024(are)X +2134(2,)X +432 5353(whereas)N +689(the)X +798(depth)X +979(of)X +1060(L1)X +1161(is)X +1230(1.)X +1323(Therefore,)X +1646(if)X +1710(L1)X +1810(were)X +1970(to)X +2046(split,)X +432 5433(the)N +543(directory)X +826(would)X +1029(not)X +1144(need)X +1303(to)X +1382(split.)X +1565(In)X +1648(reality,)X +1872(a)X +1926(bucket)X +2140(is)X +432 5513(allocated)N +727(for)X +846(the)X +969(directory)X +1264(at)X +1351(the)X +1474(time)X +1637(of)X +1732(\256le)X +1858(creation)X +2124(so)X +432 5593(although)N +707(the)X +818(directory)X +1100(splits)X +1274(logically,)X +1566(physical)X +1828(splits)X +2002(do)X +2096(not)X +432 5673(occur)N +610(until)X +760(the)X +866(\256le)X +976(becomes)X +1246(quite)X +1408(large.)X +0 f +8 s +2994 538 -0.4219(directory)AN +3374(=)X +3450 -0.3971(double\(directory\);)AX +2706 626(})N +2706 714 -0.3958(splitbucket\(bucket,)AN +3466(newbl\))X +2706 802(...)N +2418 890(})N +2 f +10 s +3169 1255(hsearch)N +1 f +2590 1387(Since)N +2 f +2807(hsearch)X +1 f +3100(does)X +3286(not)X +3427(have)X +3617(to)X +3717(translate)X +4027(hash)X +2418 1475(values)N +2659(into)X +2819(disk)X +2988(addresses,)X +3352(it)X +3432(can)X +3579(use)X +3721(much)X +3934(simpler)X +2418 1563(algorithms)N +2808(than)X +2994(those)X +3211(de\256ned)X +3495(above.)X +3775(System)X +4058(V's)X +2 f +2418 1651(hsearch)N +1 f +2708(constructs)X +3069(a)X +3141(\256xed-size)X +3489(hash)X +3671(table)X +3862(\(speci\256ed)X +2418 1739(by)N +2519(the)X +2637(user)X +2791(at)X +2869(table)X +3045(creation\).)X +3391(By)X +3504(default,)X +3767(a)X +3823(multiplica-)X +2418 1827(tive)N +2570(hash)X +2748(function)X +3046(based)X +3260(on)X +3371(that)X +3522(described)X +3861(in)X +3954(Knuth,)X +2418 1915(Volume)N +2710(3,)X +2804(section)X +3065(6.4)X +3199([KNU68])X +3541(is)X +3628(used)X +3809(to)X +3905(obtain)X +4138(a)X +2418 2003(primary)N +2694(bucket)X +2930(address.)X +3233(If)X +3309(this)X +3446(bucket)X +3681(is)X +3755(full,)X +3907(a)X +3964(secon-)X +2418 2091(dary)N +2593(multiplicative)X +3069(hash)X +3248(value)X +3454(is)X +3538(computed)X +3885(to)X +3978(de\256ne)X +2418 2179(the)N +2542(probe)X +2751(interval.)X +3062(The)X +3213(probe)X +3422(interval)X +3693(is)X +3772(added)X +3989(to)X +4076(the)X +2418 2267(original)N +2712(bucket)X +2971(address)X +3257(\(modulo)X +3573(the)X +3716(table)X +3916(size\))X +4112(to)X +2418 2355(obtain)N +2658(a)X +2734(new)X +2908(bucket)X +3162(address.)X +3483(This)X +3665(process)X +3946(repeats)X +2418 2443(until)N +2588(an)X +2688(empty)X +2911(bucket)X +3148(is)X +3224(found.)X +3474(If)X +3551(no)X +3654(bucket)X +3891(is)X +3967(found,)X +2418 2531(an)N +2514(insertion)X +2814(fails)X +2972(with)X +3134(a)X +3190(``table)X +3420(full'')X +3605(condition.)X +2590 2645(The)N +2768(basic)X +2986(algorithm)X +3350(may)X +3541(be)X +3670(modi\256ed)X +4006(by)X +4138(a)X +2418 2733(number)N +2705(of)X +2813(compile)X +3112(time)X +3295(options)X +3571(available)X +3902(to)X +4005(those)X +2418 2821(users)N +2604(with)X +2767(AT&T)X +3006(source)X +3237(code.)X +3450(First,)X +3637(the)X +3756(package)X +4040(pro-)X +2418 2909(vides)N +2638(two)X +2809(options)X +3094(for)X +3238(hash)X +3435(functions.)X +3803(Users)X +4036(may)X +2418 2997(specify)N +2690(their)X +2877(own)X +3055(hash)X +3242(function)X +3549(by)X +3669(compiling)X +4032(with)X +2418 3085(``USCR'')N +2757(de\256ned)X +3016(and)X +3155(declaring)X +3477(and)X +3616(de\256ning)X +3901(the)X +4022(vari-)X +2418 3173(able)N +2 f +2578(hcompar)X +1 f +2863(,)X +2909(a)X +2971(function)X +3263(taking)X +3488(two)X +3633(string)X +3840(arguments)X +2418 3261(and)N +2560(returning)X +2880(an)X +2982(integer.)X +3271(Users)X +3480(may)X +3643(also)X +3797(request)X +4054(that)X +2418 3349(hash)N +2587(values)X +2814(be)X +2912(computed)X +3250(simply)X +3489(by)X +3590(taking)X +3811(the)X +3930(modulo)X +2418 3437(of)N +2521(key)X +2673(\(using)X +2909(division)X +3201(rather)X +3424(than)X +3597(multiplication)X +4080(for)X +2418 3525(hash)N +2589(value)X +2787(calculation\).)X +3230(If)X +3308(this)X +3447(technique)X +3783(is)X +3859(used,)X +4049(col-)X +2418 3613(lisions)N +2651(are)X +2775(resolved)X +3072(by)X +3176(scanning)X +3485(sequentially)X +3896(from)X +4076(the)X +2418 3701(selected)N +2702(bucket)X +2941(\(linear)X +3176(probing\).)X +3517(This)X +3684(option)X +3913(is)X +3991(avail-)X +2418 3789(able)N +2572(by)X +2672(de\256ning)X +2954(the)X +3072(variable)X +3351(``DIV'')X +3622(at)X +3700(compile)X +3978(time.)X +2590 3903(A)N +2720(second)X +3015(option,)X +3311(based)X +3565(on)X +3716(an)X +3863(algorithm)X +2418 3991(discovered)N +2787(by)X +2888(Richard)X +3163(P.)X +3248(Brent,)X +3466(rearranges)X +3822(the)X +3940(table)X +4116(at)X +2418 4079(the)N +2549(time)X +2724(of)X +2824(insertion)X +3137(in)X +3232(order)X +3434(to)X +3528(speed)X +3743(up)X +3855(retrievals.)X +2418 4167(The)N +2571(basic)X +2764(idea)X +2926(is)X +3007(to)X +3097(shorten)X +3361(long)X +3531(probe)X +3741(sequences)X +4094(by)X +2418 4255(lengthening)N +2833(short)X +3030(probe)X +3249(sequences.)X +3651(Once)X +3857(the)X +3991(probe)X +2418 4343(chain)N +2613(has)X +2741(exceeded)X +3062(some)X +3252(threshold)X +3571(\(Brent)X +3796(suggests)X +4087(2\),)X +2418 4431(we)N +2541(attempt)X +2809(to)X +2899(shuf\257e)X +3145(any)X +3289(colliding)X +3601(keys)X +3776(\(keys)X +3978(which)X +2418 4519(appeared)N +2734(in)X +2821(the)X +2944(probe)X +3152(sequence)X +3471(of)X +3562(the)X +3684(new)X +3842(key\).)X +4049(The)X +2418 4607(details)N +2652(of)X +2744(this)X +2884(key)X +3025(shuf\257ing)X +3333(can)X +3469(be)X +3569(found)X +3780(in)X +3866([KNU68])X +2418 4695(and)N +2576([BRE73].)X +2946(This)X +3129(algorithm)X +3481(may)X +3660(be)X +3777(obtained)X +4094(by)X +2418 4783(de\256ning)N +2700(the)X +2818(variable)X +3097(``BRENT'')X +3487(at)X +3565(compile)X +3843(time.)X +2590 4897(A)N +2698(third)X +2899(set)X +3038(of)X +3154(options,)X +3458(obtained)X +3783(by)X +3912(de\256ning)X +2418 4985(``CHAINED'',)N +2943(use)X +3086(linked)X +3321(lists)X +3484(to)X +3581(resolve)X +3848(collisions.)X +2418 5073(Either)N +2647(of)X +2747(the)X +2878(primary)X +3164(hash)X +3343(function)X +3642(described)X +3982(above)X +2418 5161(may)N +2584(be)X +2688(used,)X +2882(but)X +3011(all)X +3118(collisions)X +3451(are)X +3577(resolved)X +3876(by)X +3983(build-)X +2418 5249(ing)N +2554(a)X +2623(linked)X +2856(list)X +2986(of)X +3086(entries)X +3333(from)X +3522(the)X +3653(primary)X +3940(bucket.)X +2418 5337(By)N +2542(default,)X +2816(new)X +2981(entries)X +3226(will)X +3381(be)X +3488(added)X +3711(to)X +3804(a)X +3871(bucket)X +4116(at)X +2418 5425(the)N +2541(beginning)X +2886(of)X +2978(the)X +3101(bucket)X +3339(chain.)X +3577(However,)X +3916(compile)X +2418 5513(options)N +2706(``SORTUP'')X +3173(or)X +3293(``SORTDOWN'')X +3908(may)X +4098(be)X +2418 5601(speci\256ed)N +2723(to)X +2805(order)X +2995(the)X +3113(hash)X +3280(chains)X +3505(within)X +3729(each)X +3897(bucket.)X +3 f +432 5960(4)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +5 p +%%Page: 5 5 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +2 f +1444 538(dynahash)N +1 f +892 670(The)N +2 f +1054(dynahash)X +1 f +1398(library,)X +1669(written)X +1932(by)X +2048(Esmond)X +2346(Pitt,)X +720 758(implements)N +1183(Larson's)X +1554(linear)X +1827(hashing)X +2165(algorithm)X +720 846([LAR88])N +1097(with)X +1302(an)X +2 f +1440(hsearch)X +1 f +1756(compatible)X +2174(interface.)X +720 934(Intuitively,)N +1099(a)X +1161(hash)X +1334(table)X +1516(begins)X +1751(as)X +1844(a)X +1905(single)X +2121(bucket)X +2360(and)X +720 1022(grows)N +941(in)X +1028(generations,)X +1443(where)X +1665(a)X +1725(generation)X +2088(corresponds)X +720 1110(to)N +815(a)X +884(doubling)X +1201(in)X +1296(the)X +1427(size)X +1585(of)X +1685(the)X +1815(hash)X +1994(table.)X +2222(The)X +2379(0)X +2 f +7 s +1078(th)Y +10 s +1 f +720 1198(generation)N +1085(occurs)X +1321(as)X +1414(the)X +1538(table)X +1719(grows)X +1940(from)X +2121(one)X +2262(bucket)X +720 1286(to)N +814(two.)X +1006(In)X +1105(the)X +1235(next)X +1405(generation)X +1776(the)X +1906(table)X +2093(grows)X +2320(from)X +720 1374(two)N +862(to)X +946(four.)X +1122(During)X +1371(each)X +1541(generation,)X +1921(every)X +2121(bucket)X +2356(that)X +720 1462(existed)N +967(at)X +1045(the)X +1163(beginning)X +1503(of)X +1590(the)X +1708(generation)X +2067(is)X +2140(split.)X +892 1576(The)N +1041(table)X +1221(starts)X +1414(as)X +1505(a)X +1565(single)X +1780(bucket)X +2018(\(numbered)X +2389(0\),)X +720 1664(the)N +839(current)X +1088(split)X +1245(bucket)X +1479(is)X +1552(set)X +1661(to)X +1743(bucket)X +1977(0,)X +2057(and)X +2193(the)X +2311(max-)X +720 1752(imum)N +933(split)X +1097(point)X +1288(is)X +1368(set)X +1483(to)X +1571(twice)X +1771(the)X +1895(current)X +2149(split)X +2312(point)X +720 1840(\(0\).)N +863(When)X +1084(it)X +1157(is)X +1239(time)X +1410(for)X +1532(a)X +1596(bucket)X +1838(to)X +1928(split,)X +2113(the)X +2239(keys)X +2414(in)X +720 1928(the)N +872(current)X +1154(split)X +1345(bucket)X +1612(are)X +1764(divided)X +2057(between)X +2378(the)X +720 2016(current)N +981(split)X +1151(bucket)X +1397(and)X +1545(a)X +1613(new)X +1779(bucket)X +2025(whose)X +2262(bucket)X +720 2104(number)N +1000(is)X +1088(equal)X +1297(to)X +1394(1)X +1469(+)X +1549(current)X +1812(split)X +1984(bucket)X +2232(+)X +2311(max-)X +720 2192(imum)N +927(split)X +1085(point.)X +1310(We)X +1442(can)X +1574(determine)X +1915(which)X +2131(keys)X +2298(move)X +720 2280(to)N +807(the)X +929(new)X +1087(bucket)X +1325(by)X +1429(examining)X +1791(the)X +2 f +1913(n)X +7 s +1962 2248(th)N +10 s +1 f +2043 2280(bit)N +2151(of)X +2242(a)X +2302(key's)X +720 2368(hash)N +899(value)X +1105(where)X +1334(n)X +1406(is)X +1491(the)X +1620(generation)X +1990(number.)X +2306(After)X +720 2456(the)N +846(bucket)X +1088(at)X +1174(the)X +1300(maximum)X +1651(split)X +1815(point)X +2006(has)X +2140(been)X +2319(split,)X +720 2544(the)N +839(generation)X +1198(number)X +1463(is)X +1536(incremented,)X +1973(the)X +2091(current)X +2339(split)X +720 2632(point)N +908(is)X +985(set)X +1098(back)X +1274(to)X +1360(zero,)X +1543(and)X +1683(the)X +1805(maximum)X +2152(split)X +2312(point)X +720 2720(is)N +815(set)X +946(to)X +1050(the)X +1190(number)X +1477(of)X +1586(the)X +1725(last)X +1877(bucket)X +2132(in)X +2235(the)X +2374(\256le)X +720 2808(\(which)N +971(is)X +1052(equal)X +1253(to)X +1342(twice)X +1543(the)X +1668(old)X +1797(maximum)X +2148(split)X +2312(point)X +720 2896(plus)N +873(1\).)X +892 3010(To)N +1031(facilitate)X +1361(locating)X +1668(keys,)X +1884(we)X +2027(maintain)X +2356(two)X +720 3098(masks.)N +989(The)X +1143(low)X +1291(mask)X +1488(is)X +1569(equal)X +1771(to)X +1861(the)X +1987(maximum)X +2339(split)X +720 3186(bucket)N +967(and)X +1116(the)X +1247(high)X +1422(mask)X +1624(is)X +1710(equal)X +1917(to)X +2011(the)X +2141(next)X +2311(max-)X +720 3274(imum)N +931(split)X +1093(bucket.)X +1372(To)X +1486(locate)X +1703(a)X +1764(speci\256c)X +2033(key,)X +2193(we)X +2311(com-)X +720 3362(pute)N +881(a)X +940(32-bit)X +1154(hash)X +1324(value)X +1520(using)X +1715(a)X +1773(bit-randomizing)X +2311(algo-)X +720 3450(rithm)N +932(such)X +1118(as)X +1224(the)X +1361(one)X +1516(described)X +1862(in)X +1962([LAR88].)X +2334(This)X +720 3538(hash)N +893(value)X +1093(is)X +1172(then)X +1336(masked)X +1607(with)X +1775(the)X +1898(high)X +2065(mask.)X +2299(If)X +2378(the)X +720 3626(resulting)N +1026(number)X +1297(is)X +1376(greater)X +1626(than)X +1790(the)X +1913(maximum)X +2262(bucket)X +720 3714(in)N +823(the)X +962(table)X +1159(\(current)X +1455(split)X +1633(bucket)X +1888(+)X +1974(maximum)X +2339(split)X +720 3802(point\),)N +962(the)X +1091(hash)X +1269(value)X +1474(is)X +1558(masked)X +1834(with)X +2007(the)X +2136(low)X +2287(mask.)X +720 3890(In)N +825(either)X +1046(case,)X +1242(the)X +1377(result)X +1592(of)X +1696(the)X +1831(mask)X +2037(is)X +2127(the)X +2262(bucket)X +720 3978(number)N +989(for)X +1107(the)X +1229(given)X +1431(key.)X +1611(The)X +1759(algorithm)X +2093(below)X +2312(illus-)X +720 4066(trates)N +914(this)X +1049(process.)X +0 f +8 s +720 4365(h)N +796(=)X +872 -0.4038(calchash\(key\);)AX +720 4453(bucket)N +986(=)X +1062(h)X +1138(&)X +1214 -0.4167(high_mask;)AX +720 4541(if)N +834(\()X +910(bucket)X +1176(>)X +1252 -0.4167(max_bucket)AX +1670(\))X +1008 4629(bucket)N +1274(=)X +1350(h)X +1426(&)X +1502 -0.4219(low_mask;)AX +720 4717 -0.4018(return\(bucket\);)AN +1 f +10 s +892 5042(In)N +1013(order)X +1237(to)X +1353(decide)X +1617(when)X +1845(to)X +1961(split)X +2152(a)X +2242(bucket,)X +2 f +720 5130(dynahash)N +1 f +1050(uses)X +2 f +1210(controlled)X +1561(splitting)X +1 f +1822(.)X +1884(A)X +1964(hash)X +2133(table)X +2311(has)X +2440(a)X +720 5218(\256ll)N +837(factor)X +1054(which)X +1279(is)X +1361(expressed)X +1707(in)X +1798(terms)X +2004(of)X +2099(the)X +2225(average)X +720 5306(number)N +990(of)X +1082(keys)X +1253(in)X +1339(each)X +1511(bucket.)X +1789(Each)X +1974(time)X +2140(the)X +2262(table's)X +720 5394(total)N +885(number)X +1153(of)X +1243(keys)X +1413(divided)X +1676(by)X +1778(its)X +1875(number)X +2142(of)X +2231(buckets)X +720 5482(exceeds)N +995(this)X +1130(\256ll)X +1238(factor,)X +1466(a)X +1522(bucket)X +1756(is)X +1829(split.)X +2878 538(Since)N +3079(the)X +2 f +3200(hsearch)X +1 f +3477(create)X +3693(interface)X +3998(\()X +2 f +4025(hcreate)X +1 f +4266(\))X +4315(calls)X +2706 626(for)N +2842(an)X +2960(estimate)X +3269(of)X +3378(the)X +3518(\256nal)X +3702(size)X +3869(of)X +3978(the)X +4118(hash)X +4306(table)X +2706 714(\()N +2 f +2733(nelem)X +1 f +2925(\),)X +2 f +3007(dynahash)X +1 f +3349(uses)X +3522(this)X +3672(information)X +4085(to)X +4182(initialize)X +2706 802(the)N +2848(table.)X +3088(The)X +3257(initial)X +3486(number)X +3774(of)X +3884(buckets)X +4172(is)X +4268(set)X +4400(to)X +2 f +2706 890(nelem)N +1 f +2926(rounded)X +3217(to)X +3306(the)X +3431(next)X +3596(higher)X +3828(power)X +4056(of)X +4150(two.)X +4337(The)X +2706 978(current)N +2958(split)X +3118(point)X +3305(is)X +3381(set)X +3493(to)X +3578(0)X +3641(and)X +3780(the)X +3901(maximum)X +4248(bucket)X +2706 1066(and)N +2842(maximum)X +3186(split)X +3343(point)X +3527(are)X +3646(set)X +3755(to)X +3837(this)X +3972(rounded)X +4255(value.)X +3 f +3148 1220(The)N +3301(New)X +3473(Implementation)X +1 f +2878 1352(Our)N +3042(implementation)X +3583(is)X +3675(also)X +3842(based)X +4063(on)X +4181(Larson's)X +2706 1440(linear)N +2939(hashing)X +3238([LAR88])X +3582(algorithm)X +3943(as)X +4060(well)X +4248(as)X +4364(the)X +2 f +2706 1528(dynahash)N +1 f +3047(implementation.)X +3623(The)X +2 f +3782(dbm)X +1 f +3954(family)X +4197(of)X +4297(algo-)X +2706 1616(rithms)N +2942(decide)X +3184(dynamically)X +3612(which)X +3840(bucket)X +4085(to)X +4178(split)X +4346(and)X +2706 1704(when)N +2914(to)X +3010(split)X +3180(it)X +3257(\(when)X +3491(it)X +3568(over\257ows\))X +3944(while)X +2 f +4155(dynahash)X +1 f +2706 1792(splits)N +2933(in)X +3054(a)X +3149(prede\256ned)X +3547(order)X +3776(\(linearly\))X +4134(and)X +4309(at)X +4426(a)X +2706 1880(prede\256ned)N +3116(time)X +3328(\(when)X +3599(the)X +3767(table)X +3993(\256ll)X +4151(factor)X +4409(is)X +2706 1968(exceeded\).)N +3121(We)X +3280(use)X +3434(a)X +3517(hybrid)X +3773(of)X +3887(these)X +4099(techniques.)X +2706 2056(Splits)N +2913(occur)X +3118(in)X +3206(the)X +3330(prede\256ned)X +3695(order)X +3891(of)X +3984(linear)X +4193(hashing,)X +2706 2144(but)N +2845(the)X +2980(time)X +3159(at)X +3253(which)X +3485(pages)X +3704(are)X +3839(split)X +4012(is)X +4101(determined)X +2706 2232(both)N +2869(by)X +2970(page)X +3143(over\257ows)X +3480(\()X +2 f +3507(uncontrolled)X +3937(splitting)X +1 f +4198(\))X +4246(and)X +4382(by)X +2706 2320(exceeding)N +3052(the)X +3170(\256ll)X +3278(factor)X +3486(\()X +2 f +3513(controlled)X +3862(splitting)X +1 f +4123(\))X +2878 2434(A)N +2962(hash)X +3135(table)X +3317(is)X +3395(parameterized)X +3876(by)X +3981(both)X +4148(its)X +4248(bucket)X +2706 2522(size)N +2904(\()X +2 f +2931(bsize)X +1 f +(\))S +3191(and)X +3380(\256ll)X +3541(factor)X +3801(\()X +2 f +3828(ffactor)X +1 f +4041(\).)X +4180(Whereas)X +2 f +2706 2610(dynahash's)N +1 f +3095(buckets)X +3364(can)X +3500(be)X +3599(represented)X +3993(as)X +4083(a)X +4142(linked)X +4365(list)X +2706 2698(of)N +2798(elements)X +3108(in)X +3195(memory,)X +3507(our)X +3639(package)X +3928(needs)X +4136(to)X +4222(support)X +2706 2786(disk)N +2874(access,)X +3135(and)X +3286(must)X +3476(represent)X +3806(buckets)X +4086(in)X +4183(terms)X +4395(of)X +2706 2874(pages.)N +2955(The)X +2 f +3106(bsize)X +1 f +3291(is)X +3369(the)X +3492(size)X +3642(\(in)X +3756(bytes\))X +3977(of)X +4069(these)X +4259(pages.)X +2706 2962(As)N +2833(in)X +2933(linear)X +3154(hashing,)X +3461(the)X +3597(number)X +3879(of)X +3983(buckets)X +4265(in)X +4364(the)X +2706 3050(table)N +2906(is)X +3003(equal)X +3221(to)X +3327(the)X +3469(number)X +3758(of)X +3869(keys)X +4060(in)X +4165(the)X +4306(table)X +2706 3138(divided)N +2988(by)X +2 f +3110(ffactor)X +1 f +3323(.)X +2 f +8 s +3113(6)Y +1 f +10 s +3417 3138(The)N +3584(controlled)X +3950(splitting)X +4252(occurs)X +2706 3226(each)N +2878(time)X +3044(the)X +3166(number)X +3435(of)X +3526(keys)X +3697(in)X +3783(the)X +3905(table)X +4085(exceeds)X +4364(the)X +2706 3314(\256ll)N +2814(factor)X +3022(multiplied)X +3370(by)X +3470(the)X +3588(number)X +3853(of)X +3940(buckets.)X +2878 3428(Inserting)N +3187(keys)X +3358(and)X +3498(splitting)X +3783(buckets)X +4051(is)X +4127(performed)X +2706 3516(precisely)N +3018(as)X +3107(described)X +3437(previously)X +3796(for)X +2 f +3911(dynahash)X +1 f +4218(.)X +4279(How-)X +2706 3604(ever,)N +2897(since)X +3094(buckets)X +3371(are)X +3502(now)X +3671(comprised)X +4036(of)X +4134(pages,)X +4368(we)X +2706 3692(must)N +2883(be)X +2981(prepared)X +3284(to)X +3367(handle)X +3602(cases)X +3793(where)X +4011(the)X +4130(size)X +4276(of)X +4364(the)X +2706 3780(keys)N +2873(and)X +3009(data)X +3163(in)X +3245(a)X +3301(bucket)X +3535(exceed)X +3779(the)X +3897(bucket)X +4131(size.)X +3 f +3318 3934(Over\257ow)N +3654(Pages)X +1 f +2878 4066(There)N +3095(are)X +3223(two)X +3372(cases)X +3571(where)X +3797(a)X +3862(key)X +4007(may)X +4174(not)X +4305(\256t)X +4400(in)X +2706 4154(its)N +2802(designated)X +3166(bucket.)X +3441(In)X +3529(the)X +3647(\256rst)X +3791(case,)X +3970(the)X +4088(total)X +4250(size)X +4395(of)X +2706 4242(the)N +2833(key)X +2978(and)X +3123(data)X +3286(may)X +3453(exceed)X +3706(the)X +3833(bucket)X +4076(size.)X +4269(In)X +4364(the)X +2706 4330(second,)N +3008(addition)X +3328(of)X +3453(a)X +3547(new)X +3739(key)X +3913(could)X +4149(cause)X +4386(an)X +2706 4418(over\257ow,)N +3068(but)X +3227(the)X +3382(bucket)X +3652(in)X +3770(question)X +4097(is)X +4206(not)X +4364(yet)X +2706 4506(scheduled)N +3049(to)X +3133(be)X +3230(split.)X +3428(In)X +3516(existing)X +3790(implementations,)X +4364(the)X +2706 4594(second)N +2953(case)X +3115(never)X +3317(arises)X +3523(\(since)X +3738(buckets)X +4006(are)X +4128(split)X +4288(when)X +2706 4682(they)N +2871(over\257ow\))X +3210(and)X +3352(the)X +3476(\256rst)X +3626(case)X +3791(is)X +3870(not)X +3998(handled)X +4278(at)X +4362(all.)X +2706 4770(Although)N +3036(large)X +3225(key/data)X +3525(pair)X +3678(handling)X +3986(is)X +4066(dif\256cult)X +4346(and)X +2706 4858(expensive,)N +3083(it)X +3163(is)X +3252(essential.)X +3604(In)X +3706(a)X +3777(linear)X +3995(hashed)X +4253(imple-)X +2706 4946(mentation,)N +3087(over\257ow)X +3413(pages)X +3636(are)X +3775(required)X +4083(for)X +4217(buckets)X +2706 5034(which)N +2935(over\257ow)X +3253(before)X +3492(they)X +3662(are)X +3793(split,)X +3982(so)X +4085(we)X +4211(can)X +4355(use)X +2706 5122(the)N +2833(same)X +3027(mechanism)X +3421(for)X +3544(large)X +3734(key/data)X +4035(pairs)X +4220(that)X +4368(we)X +2706 5210(use)N +2837(for)X +2955(over\257ow)X +3264(pages.)X +3511(Logically,)X +3862(we)X +3980(chain)X +4177(over\257ow)X +16 s +2706 5353 MXY +864 0 Dl +2 f +8 s +2746 5408(6)N +1 f +9 s +2801 5433(This)N +2952(is)X +3023(not)X +3138(strictly)X +3361(true.)X +3532(The)X +3667(\256le)X +3782(does)X +3937(not)X +4052(contract)X +4306(when)X +2706 5513(keys)N +2861(are)X +2972(deleted,)X +3221(so)X +3308(the)X +3419(number)X +3662(of)X +3744(buckets)X +3986(is)X +4056(actually)X +4306(equal)X +2706 5593(to)N +2782(the)X +2890(maximum)X +3202(number)X +3441(of)X +3520(keys)X +3671(ever)X +3814(present)X +4041(in)X +4116(the)X +4223(table)X +4382(di-)X +2706 5673(vided)N +2884(by)X +2974(the)X +3080(\256ll)X +3178(factor.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(5)X + +6 p +%%Page: 6 6 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(pages)N +639(to)X +725(the)X +847(buckets)X +1116(\(also)X +1296(called)X +1512(primary)X +1789(pages\).)X +2062(In)X +2152(a)X +432 626(memory)N +730(based)X +943(representation,)X +1448(over\257ow)X +1763(pages)X +1976(do)X +2086(not)X +432 714(pose)N +628(any)X +792(special)X +1063(problems)X +1409(because)X +1712(we)X +1854(can)X +2014(chain)X +432 802(over\257ow)N +776(pages)X +1017(to)X +1137(primary)X +1449(pages)X +1690(using)X +1921(memory)X +432 890(pointers.)N +776(However,)X +1137(mapping)X +1463(these)X +1674(over\257ow)X +2005(pages)X +432 978(into)N +584(a)X +648(disk)X +809(\256le)X +939(is)X +1019(more)X +1211(of)X +1305(a)X +1368(challenge,)X +1723(since)X +1915(we)X +2036(need)X +432 1066(to)N +547(be)X +675(able)X +861(to)X +975(address)X +1268(both)X +1462(bucket)X +1728(pages,)X +1983(whose)X +432 1154(numbers)N +729(are)X +849(growing)X +1137(linearly,)X +1422(and)X +1558(some)X +1747(indeterminate)X +432 1242(number)N +715(of)X +820(over\257ow)X +1143(pages)X +1364(without)X +1646(reorganizing)X +2090(the)X +432 1330(\256le.)N +604 1444(One)N +789(simple)X +1053(solution)X +1361(would)X +1612(be)X +1739(to)X +1852(allocate)X +2152(a)X +432 1532(separate)N +737(\256le)X +880(for)X +1015(over\257ow)X +1341(pages.)X +1604(The)X +1769(disadvantage)X +432 1620(with)N +605(such)X +783(a)X +850(technique)X +1193(is)X +1276(that)X +1426(it)X +1500(requires)X +1789(an)X +1895(extra)X +2086(\256le)X +432 1708(descriptor,)N +794(an)X +891(extra)X +1073(system)X +1316(call)X +1453(on)X +1554(open)X +1731(and)X +1867(close,)X +2072(and)X +432 1796(logically)N +739(associating)X +1122(two)X +1269(independent)X +1687(\256les.)X +1886(For)X +2023(these)X +432 1884(reasons,)N +728(we)X +857(wanted)X +1123(to)X +1219(map)X +1391(both)X +1567(primary)X +1855(pages)X +2072(and)X +432 1972(over\257ow)N +737(pages)X +940(into)X +1084(the)X +1202(same)X +1387(\256le)X +1509(space.)X +604 2086(The)N +799(buddy-in-waiting)X +1425(algorithm)X +1806(provides)X +2152(a)X +432 2174(mechanism)N +851(to)X +966(support)X +1259(multiple)X +1578(pages)X +1814(per)X +1970(logical)X +432 2262(bucket)N +685(while)X +902(retaining)X +1226(the)X +1362(simple)X +1613(split)X +1788(sequence)X +2121(of)X +432 2350(linear)N +681(hashing.)X +1015(Over\257ow)X +1383(pages)X +1631(are)X +1795(preallocated)X +432 2438(between)N +781(generations)X +1232(of)X +1379(primary)X +1713(pages.)X +1996(These)X +432 2526(over\257ow)N +759(pages)X +984(are)X +1125(used)X +1314(by)X +1436(any)X +1594(bucket)X +1850(containing)X +432 2614(more)N +646(keys)X +842(than)X +1029(\256t)X +1144(on)X +1273(the)X +1420(primary)X +1723(page)X +1924(and)X +2089(are)X +432 2702(reclaimed,)N +808(if)X +896(possible,)X +1217(when)X +1430(the)X +1567(bucket)X +1819(later)X +2000(splits.)X +432 2790(Figure)N +687(3)X +773(depicts)X +1045(the)X +1188(layout)X +1433(of)X +1545(primary)X +1844(pages)X +2072(and)X +432 2878(over\257ow)N +752(pages)X +970(within)X +1209(the)X +1342(same)X +1542(\256le.)X +1699(Over\257ow)X +2036(page)X +432 2966(use)N +586(information)X +1011(is)X +1111(recorded)X +1440(in)X +1548(bitmaps)X +1847(which)X +2089(are)X +432 3054(themselves)N +819(stored)X +1046(on)X +1157(over\257ow)X +1472(pages.)X +1725(The)X +1880(addresses)X +432 3142(of)N +520(the)X +639(bitmap)X +882(pages)X +1086(and)X +1223(the)X +1342(number)X +1608(of)X +1695(pages)X +1898(allocated)X +432 3230(at)N +515(each)X +688(split)X +850(point)X +1039(are)X +1163(stored)X +1384(in)X +1470(the)X +1592(\256le)X +1718(header.)X +1997(Using)X +432 3318(this)N +577(information,)X +1005(both)X +1177(over\257ow)X +1492(addresses)X +1829(and)X +1974(bucket)X +432 3406(addresses)N +764(can)X +900(be)X +999(mapped)X +1276(to)X +1361(disk)X +1517(addresses)X +1848(by)X +1951(the)X +2072(fol-)X +432 3494(lowing)N +674(calculation:)X +0 f +8 s +432 3793(int)N +736(bucket;)X +1192(/*)X +1306(bucket)X +1572(address)X +1876(*/)X +432 3881(u_short)N +736(oaddr;)X +1192(/*)X +1306(OVERFLOW)X +1648(address)X +1952(*/)X +432 3969(int)N +736 -0.4125(nhdr_pages;)AX +1192(/*)X +1306(npages)X +1572(in)X +1686 -112.4062(\256le)AX +1838(header)X +2104(*/)X +432 4057(int)N +736 -0.4125(spares[32];)AX +1192(/*)X +1306(npages)X +1572(at)X +1686(each)X +1876(split)X +2104(*/)X +432 4145(int)N +736(log2\(\);)X +1198(/*)X +1312(ceil\(log)X +1654(base)X +1844(2\))X +1958(*/)X +432 4321(#DEFINE)N +736 -0.3929(BUCKET_TO_PAGE\(bucket\))AX +1610(\\)X +584 4409(bucket)N +850(+)X +926 -0.4167(nhdr_pages)AX +1344(+)X +1420(\\)X +584 4497 -0.3894(\(bucket?spares[logs2\(bucket)AN +1648(+)X +1724(1\)-1]:0\))X +432 4673(#DEFINE)N +736 -0.3947(OADDR_TO_PAGE\(oaddr\))AX +1534(\\)X +584 4761 -0.3984(BUCKET_TO_PAGE\(\(1)AN +1268(<<)X +1382 -0.4091(\(oaddr>>11\)\))AX +1876(-)X +1952(1\))X +2066(+)X +2142(\\)X +584 4849(oaddr)N +812(&)X +888(0x7ff;)X +1 f +10 s +604 5262(An)N +728(over\257ow)X +1039(page)X +1217(is)X +1295(addressed)X +1637(by)X +1742(its)X +1842(split)X +2004(point,)X +432 5350(identifying)N +858(the)X +1031(generations)X +1476(between)X +1819(which)X +2090(the)X +432 5438(over\257ow)N +740(page)X +915(is)X +991(allocated,)X +1324(and)X +1463(its)X +1561(page)X +1736(number,)X +2023(iden-)X +432 5526(tifying)N +665(the)X +783(particular)X +1111(page)X +1283(within)X +1507(the)X +1625(split)X +1782(point.)X +1986(In)X +2073(this)X +432 5614(implementation,)N +983(offsets)X +1225(within)X +1457(pages)X +1668(are)X +1795(16)X +1903(bits)X +2046(long)X +432 5702(\(limiting)N +732(the)X +851(maximum)X +1196(page)X +1368(size)X +1513(to)X +1595(32K\),)X +1800(so)X +1891(we)X +2005(select)X +2418 538(an)N +2535(over\257ow)X +2860(page)X +3052(addressing)X +3435(algorithm)X +3786(that)X +3946(can)X +4098(be)X +2418 626(expressed)N +2760(in)X +2847(16)X +2952(bits)X +3091(and)X +3231(which)X +3451(allows)X +3684(quick)X +3886(retrieval.)X +2418 714(The)N +2568(top)X +2695(\256ve)X +2840(bits)X +2980(indicate)X +3258(the)X +3380(split)X +3541(point)X +3729(and)X +3869(the)X +3991(lower)X +2418 802(eleven)N +2650(indicate)X +2926(the)X +3046(page)X +3220(number)X +3487(within)X +3713(the)X +3832(split)X +3990(point.)X +2418 890(Since)N +2633(\256ve)X +2789(bits)X +2940(are)X +3075(reserved)X +3384(for)X +3514(the)X +3648(split)X +3821(point,)X +4041(\256les)X +2418 978(may)N +2578(split)X +2737(32)X +2839(times)X +3034(yielding)X +3318(a)X +3376(maximum)X +3721(\256le)X +3844(size)X +3990(of)X +4078(2)X +7 s +946(32)Y +10 s +2418 1066(buckets)N +2698(and)X +2849(32)X +2 f +(*)S +1 f +2982(2)X +7 s +1034(11)Y +10 s +3113 1066(over\257ow)N +3433(pages.)X +3691(The)X +3850(maximum)X +2418 1154(page)N +2597(size)X +2749(is)X +2829(2)X +7 s +1122(15)Y +10 s +1154(,)Y +2971(yielding)X +3259(a)X +3321(maximum)X +3671(\256le)X +3799(size)X +3950(greater)X +2418 1242(than)N +2601(131,000)X +2906(GB)X +3061(\(on)X +3212(\256le)X +3358(systems)X +3655(supporting)X +4041(\256les)X +2418 1330(larger)N +2626(than)X +2784(4GB\).)X +10 f +2418 1418 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +4014 2275 MXY +0 133 Dl +3881 2275 MXY +0 133 Dl +3748 2275 MXY +0 133 Dl +3083 2275 MXY +0 133 Dl +5 s +1 f +3523 2475(2/3)N +3390(2/2)X +3257(2/1)X +2859(1/2)X +2726(1/1)X +5 Dt +3814 1743 MXY +0 133 Dl +3282 1743 MXY +0 133 Dl +3017 1743 MXY +0 133 Dl +2884 1743 MXY +0 133 Dl +1 Dt +3681 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3415 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3282 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3150 MX +0 133 Dl +132 0 Dl +0 -133 Dl +-132 0 Dl +3017 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +2884 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 f +8 s +3017 2601(Over\257ow)N +3285(Addresses)X +3515 2833(Over\257ow)N +3783(Pages)X +2850(Buckets)X +1 Di +3349 2740 MXY + 3349 2740 lineto + 3482 2740 lineto + 3482 2873 lineto + 3349 2873 lineto + 3349 2740 lineto +closepath 3 3349 2740 3482 2873 Dp +2684 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +5 Dt +4146 2275 MXY +0 133 Dl +3216 2275 MXY +0 133 Dl +2684 2275 MXY +0 133 Dl +2551 2275 MXY +0 133 Dl +1 f +3798 1963(3)N +3266 1980(2)N +3001(1)X +2868(0)X +1 Dt +2751 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 2275 MXY +-15 -22 Dl +2 16 Dl +-13 11 Dl +26 -5 Dl +-282 -117 Dl +3432 2275 MXY +-10 -25 Dl +-2 16 Dl +-15 8 Dl +27 1 Dl +-166 -117 Dl +3282 2275 MXY +12 -25 Dl +-14 10 Dl +-15 -6 Dl +17 21 Dl +-16 -117 Dl +2884 2275 MXY +26 7 Dl +-12 -12 Dl +3 -16 Dl +-17 21 Dl +382 -117 Dl +2751 2275 MXY +25 9 Dl +-11 -12 Dl +5 -17 Dl +-19 20 Dl +515 -117 Dl +3 f +3070 2152(Over\257ow)N +3338(Pages)X +3482 2275 MXY + 3482 2275 lineto + 3615 2275 lineto + 3615 2408 lineto + 3482 2408 lineto + 3482 2275 lineto +closepath 3 3482 2275 3615 2408 Dp +3349 MX + 3349 2275 lineto + 3482 2275 lineto + 3482 2408 lineto + 3349 2408 lineto + 3349 2275 lineto +closepath 3 3349 2275 3482 2408 Dp +3216 MX + 3216 2275 lineto + 3349 2275 lineto + 3349 2408 lineto + 3216 2408 lineto + 3216 2275 lineto +closepath 3 3216 2275 3349 2408 Dp +2817 MX + 2817 2275 lineto + 2950 2275 lineto + 2950 2408 lineto + 2817 2408 lineto + 2817 2275 lineto +closepath 3 2817 2275 2950 2408 Dp +2684 MX + 2684 2275 lineto + 2817 2275 lineto + 2817 2408 lineto + 2684 2408 lineto + 2684 2275 lineto +closepath 3 2684 2275 2817 2408 Dp +3615 MX +0 133 Dl +531 0 Dl +0 -133 Dl +-531 0 Dl +2950 MX +0 133 Dl +266 0 Dl +0 -133 Dl +-266 0 Dl +2551 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3798 1726 MXY +-21 -18 Dl +6 16 Dl +-10 13 Dl +25 -11 Dl +-599 -99 Dl +3266 1726 MXY +-1 -27 Dl +-7 15 Dl +-17 1 Dl +25 11 Dl +-67 -99 Dl +3033 1726 MXY +27 1 Dl +-14 -8 Dl +-1 -17 Dl +-12 24 Dl +166 -99 Dl +2900 1726 MXY +27 7 Dl +-13 -11 Dl +3 -17 Dl +-17 21 Dl +299 -99 Dl +3058 1621(Split)N +3203(Points)X +2418 2275 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 Dt +-1 Ds +3137(Figure)Y +2619(3:)X +1 f +2691(Split)X +2832(points)X +3008(occur)X +3168(between)X +3399(generations)X +3712(and)X +3823(are)X +3919(numbered)X +2418 3225(from)N +2560(0.)X +2642(In)X +2713(this)X +2824(\256gure)X +2991(there)X +3136(are)X +3231(two)X +3345(over\257ow)X +3590(pages)X +3753(allocated)X +4000(at)X +4063(split)X +2418 3313(point)N +2566(1)X +2614(and)X +2722(three)X +2865(allocated)X +3111(at)X +3173(split)X +3300(point)X +3448(2.)X +10 s +10 f +2418 3489 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +2949 3731(Buffer)N +3192(Management)X +1 f +2590 3863(The)N +2744(hash)X +2920(table)X +3105(is)X +3187(stored)X +3412(in)X +3502(memory)X +3797(as)X +3892(a)X +3956(logical)X +2418 3951(array)N +2633(of)X +2749(bucket)X +3012(pointers.)X +3359(Physically,)X +3761(the)X +3907(array)X +4121(is)X +2418 4039(arranged)N +2728(in)X +2818(segments)X +3144(of)X +3239(256)X +3387(pointers.)X +3713(Initially,)X +4013(there)X +2418 4127(is)N +2530(space)X +2767(to)X +2887(allocate)X +3195(256)X +3373(segments.)X +3769(Reallocation)X +2418 4215(occurs)N +2651(when)X +2847(the)X +2967(number)X +3234(of)X +3323(buckets)X +3590(exceeds)X +3867(32K)X +4027(\(256)X +2418 4303(*)N +2508(256\).)X +2745(Primary)X +3053(pages)X +3286(may)X +3473(be)X +3598(accessed)X +3929(directly)X +2418 4391(through)N +2711(the)X +2853(array)X +3062(by)X +3185(bucket)X +3442(number)X +3730(and)X +3889(over\257ow)X +2418 4479(pages)N +2628(are)X +2754 0.4028(referenced)AX +3122(logically)X +3429(by)X +3536(their)X +3710(over\257ow)X +4022(page)X +2418 4567(address.)N +2726(For)X +2864(small)X +3063(hash)X +3236(tables,)X +3469(it)X +3539(is)X +3618(desirable)X +3934(to)X +4022(keep)X +2418 4655(all)N +2525(pages)X +2735(in)X +2823(main)X +3009(memory)X +3302(while)X +3506(on)X +3612(larger)X +3826(tables,)X +4059(this)X +2418 4743(is)N +2523(probably)X +2860(impossible.)X +3298(To)X +3438(satisfy)X +3698(both)X +3891(of)X +4009(these)X +2418 4831(requirements,)N +2900(the)X +3041(package)X +3348(includes)X +3658(buffer)X +3897(manage-)X +2418 4919(ment)N +2598(with)X +2760(LRU)X +2940(\(least)X +3134(recently)X +3413(used\))X +3607(replacement.)X +2590 5033(By)N +2730(default,)X +3020(the)X +3165(package)X +3475(allocates)X +3802(up)X +3928(to)X +4036(64K)X +2418 5121(bytes)N +2616(of)X +2712(buffered)X +3014(pages.)X +3246(All)X +3377(pages)X +3589(in)X +3680(the)X +3807(buffer)X +4032(pool)X +2418 5209(are)N +2542(linked)X +2766(in)X +2852(LRU)X +3036(order)X +3230(to)X +3316(facilitate)X +3621(fast)X +3761(replacement.)X +2418 5297(Whereas)N +2724(ef\256cient)X +3011(access)X +3241(to)X +3327(primary)X +3605(pages)X +3812(is)X +3889(provided)X +2418 5385(by)N +2521(the)X +2642(bucket)X +2879(array,)X +3087(ef\256cient)X +3372(access)X +3600(to)X +3684(over\257ow)X +3991(pages)X +2418 5473(is)N +2501(provided)X +2816(by)X +2926(linking)X +3182(over\257ow)X +3497(page)X +3679(buffers)X +3936(to)X +4027(their)X +2418 5561(predecessor)N +2827(page)X +3008(\(either)X +3247(the)X +3374(primary)X +3657(page)X +3838(or)X +3933(another)X +2418 5649(over\257ow)N +2742(page\).)X +3000(This)X +3181(means)X +3425(that)X +3584(an)X +3699(over\257ow)X +4022(page)X +3 f +432 5960(6)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +7 p +%%Page: 7 7 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(cannot)N +955(be)X +1052(present)X +1305(in)X +1388(the)X +1507(buffer)X +1724(pool)X +1886(if)X +1955(its)X +2050(primary)X +2324(page)X +720 626(is)N +804(not)X +937(present.)X +1240(This)X +1413(does)X +1591(not)X +1724(impact)X +1972(performance)X +2409(or)X +720 714(functionality,)N +1209(because)X +1524(an)X +1660(over\257ow)X +2005(page)X +2217(will)X +2400(be)X +720 802(accessed)N +1048(only)X +1236(after)X +1430(its)X +1550(predecessor)X +1975(page)X +2172(has)X +2324(been)X +720 890(accessed.)N +1068(Figure)X +1303(4)X +1369(depicts)X +1622(the)X +1746(data)X +1905(structures)X +2242(used)X +2414(to)X +720 978(manage)N +990(the)X +1108(buffer)X +1325(pool.)X +892 1092(The)N +1040(in-memory)X +1419(bucket)X +1656(array)X +1845(contains)X +2134(pointers)X +2414(to)X +720 1180(buffer)N +975(header)X +1248(structures)X +1617(which)X +1870(represent)X +2222(primary)X +720 1268(pages.)N +968(Buffer)X +1203(headers)X +1474(contain)X +1735(modi\256ed)X +2043(bits,)X +2202(the)X +2324(page)X +720 1356(address)N +995(of)X +1096(the)X +1228(buffer,)X +1479(a)X +1548(pointer)X +1808(to)X +1903(the)X +2034(actual)X +2259(buffer,)X +720 1444(and)N +875(a)X +950(pointer)X +1216(to)X +1317(the)X +1454(buffer)X +1690(header)X +1944(for)X +2077(an)X +2191(over\257ow)X +720 1532(page)N +901(if)X +979(it)X +1052(exists,)X +1283(in)X +1374(addition)X +1665(to)X +1756(the)X +1883(LRU)X +2072(links.)X +2296(If)X +2378(the)X +720 1620(buffer)N +950(corresponding)X +1442(to)X +1537(a)X +1606(particular)X +1947(bucket)X +2194(is)X +2280(not)X +2414(in)X +720 1708(memory,)N +1048(its)X +1164(pointer)X +1432(is)X +1526(NULL.)X +1801(In)X +1909(effect,)X +2154(pages)X +2377(are)X +720 1796(linked)N +950(in)X +1042(three)X +1233(ways.)X +1468(Using)X +1689(the)X +1817(buffer)X +2043(headers,)X +2338(they)X +720 1884(are)N +851(linked)X +1083(physically)X +1444(through)X +1725(the)X +1854(LRU)X +2045(links)X +2231(and)X +2378(the)X +720 1972(over\257ow)N +1036(links.)X +1241(Using)X +1462(the)X +1590(pages)X +1803(themselves,)X +2209(they)X +2377(are)X +720 2060(linked)N +943(logically)X +1246(through)X +1518(the)X +1639(over\257ow)X +1946(addresses)X +2276(on)X +2378(the)X +720 2148(page.)N +948(Since)X +1162(over\257ow)X +1482(pages)X +1700(are)X +1834(accessed)X +2151(only)X +2328(after)X +720 2236(their)N +904(predecessor)X +1321(pages,)X +1560(they)X +1734(are)X +1869(removed)X +2186(from)X +2378(the)X +720 2324(buffer)N +937(pool)X +1099(when)X +1293(their)X +1460(primary)X +1734(is)X +1807(removed.)X +10 f +720 2412 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +2309 3177 MXY +24 15 Dl +-8 -15 Dl +8 -15 Dl +-24 15 Dl +52 0 Dl +789 3160 MXY +-35 0 Dl +0 -156 Dl +1607 0 Dl +0 173 Dl +789 3091 MXY +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-69 0 Dl +2309 3125 MXY +104 0 Dl +0 -155 Dl +-1693 0 Dl +0 121 Dl +927 3160 MXY +24 15 Dl +-9 -15 Dl +9 -15 Dl +-24 15 Dl +553 0 Dl +1618 3177 MXY +8 27 Dl +4 -17 Dl +16 -6 Dl +-28 -4 Dl +138 121 Dl +1895 3315 MXY +28 3 Dl +-15 -9 Dl +1 -18 Dl +-14 24 Dl +276 -138 Dl +3108 MY +-28 -3 Dl +15 10 Dl +-1 17 Dl +14 -24 Dl +-276 138 Dl +1756 3229 MXY +-8 -27 Dl +-3 17 Dl +-16 6 Dl +27 4 Dl +-138 -121 Dl +1480 MX +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-553 0 Dl +3 f +5 s +1083 3073(LRU)N +1178(chain)X +4 Ds +1402 3851 MXY + 1402 3851 lineto + 1471 3851 lineto + 1471 3920 lineto + 1402 3920 lineto + 1402 3851 lineto +closepath 19 1402 3851 1471 3920 Dp +1445 3747(Over\257ow)N +1613(Address)X +1549 3609 MXY +0 69 Dl +1756 MX +-23 -15 Dl +8 15 Dl +-8 15 Dl +23 -15 Dl +-207 0 Dl +-1 Ds +3 Dt +1756 3419 MXY +-6 -28 Dl +-4 17 Dl +-17 5 Dl +27 6 Dl +-138 -138 Dl +2240 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1826 3609 MXY +15 -24 Dl +-15 9 Dl +-16 -9 Dl +16 24 Dl +0 -138 Dl +1549 MX +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +2240 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1549 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1 Dt +2171 3471 MXY + 2171 3471 lineto + 2448 3471 lineto + 2448 3609 lineto + 2171 3609 lineto + 2171 3471 lineto +closepath 19 2171 3471 2448 3609 Dp +1756 3609 MXY + 1756 3609 lineto + 2033 3609 lineto + 2033 3747 lineto + 1756 3747 lineto + 1756 3609 lineto +closepath 3 1756 3609 2033 3747 Dp +1480 3471 MXY + 1480 3471 lineto + 1756 3471 lineto + 1756 3609 lineto + 1480 3609 lineto + 1480 3471 lineto +closepath 19 1480 3471 1756 3609 Dp +789 MX + 789 3471 lineto + 1065 3471 lineto + 1065 3609 lineto + 789 3609 lineto + 789 3471 lineto +closepath 19 789 3471 1065 3609 Dp +962 3903(Buffer)N +1083(Header)X +849 3851 MXY + 849 3851 lineto + 918 3851 lineto + 918 3920 lineto + 849 3920 lineto + 849 3851 lineto +closepath 14 849 3851 918 3920 Dp +1756 3194 MXY + 1756 3194 lineto + 1895 3194 lineto + 1895 3471 lineto + 1756 3471 lineto + 1756 3194 lineto +closepath 14 1756 3194 1895 3471 Dp +2171 3056 MXY + 2171 3056 lineto + 2309 3056 lineto + 2309 3333 lineto + 2171 3333 lineto + 2171 3056 lineto +closepath 14 2171 3056 2309 3333 Dp +1480 MX + 1480 3056 lineto + 1618 3056 lineto + 1618 3333 lineto + 1480 3333 lineto + 1480 3056 lineto +closepath 14 1480 3056 1618 3333 Dp +789 MX + 789 3056 lineto + 927 3056 lineto + 927 3333 lineto + 789 3333 lineto + 789 3056 lineto +closepath 14 789 3056 927 3333 Dp +2780 MY +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +927 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1065 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1203 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1342 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1480 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1618 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1756 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1895 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2033 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2171 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2309 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +13 s +1048 2720(In)N +1173(Memory)X +1580(Bucket)X +1918(Array)X +867 3584(B0)N +1558(B5)X +2223(B10)X +1788 3722(O1/1)N +5 s +1515 3903(Primay)N +1651(Buffer)X +4 Ds +1990 3851 MXY + 1990 3851 lineto + 2059 3851 lineto + 2059 3920 lineto + 1990 3920 lineto + 1990 3851 lineto +closepath 3 1990 3851 2059 3920 Dp +2102 3903(Over\257ow)N +2270(Buffer)X +3 Dt +-1 Ds +8 s +720 4184(Figure)N +922(4:)X +1 f +996(Three)X +1164(primary)X +1386(pages)X +1551(\(B0,)X +1683(B5,)X +1794(B10\))X +1942(are)X +2039(accessed)X +2281(directly)X +720 4272(from)N +862(the)X +958(bucket)X +1146(array.)X +1326(The)X +1443(one)X +1553(over\257ow)X +1798(page)X +1935(\(O1/1\))X +2122(is)X +2182(linked)X +2359(phy-)X +720 4360(sically)N +915(from)X +1067(its)X +1155(primary)X +1384(page's)X +1577(buffer)X +1759(header)X +1955(as)X +2035(well)X +2172(as)X +2252(logically)X +720 4448(from)N +860(its)X +937(predecessor)X +1253(page)X +1389(buffer)X +1560(\(B5\).)X +10 s +10 f +720 4624 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1191 4954(Table)N +1406(Parameterization)X +1 f +892 5086(When)N +1107(a)X +1166(hash)X +1336(table)X +1515(is)X +1590(created,)X +1865(the)X +1985(bucket)X +2221(size,)X +2388(\256ll)X +720 5174(factor,)N +953(initial)X +1164(number)X +1434(of)X +1526(elements,)X +1856(number)X +2125(of)X +2216(bytes)X +2409(of)X +720 5262(main)N +919(memory)X +1225(used)X +1411(for)X +1543(caching,)X +1851(and)X +2005(a)X +2079(user-de\256ned)X +720 5350(hash)N +892(function)X +1184(may)X +1347(be)X +1448(speci\256ed.)X +1797(The)X +1946(bucket)X +2184(size)X +2333(\(and)X +720 5438(page)N +906(size)X +1064(for)X +1191(over\257ow)X +1509(pages\))X +1752(defaults)X +2039(to)X +2134(256)X +2287(bytes.)X +720 5526(For)N +858(tables)X +1072(with)X +1241(large)X +1429(data)X +1590(items,)X +1810(it)X +1881(may)X +2046(be)X +2149(preferable)X +720 5614(to)N +803(increase)X +1088(the)X +1207(page)X +1380(size,)X +1545(and,)X +1701(conversely,)X +2089(applications)X +720 5702(storing)N +1002(small)X +1235(items)X +1467(exclusively)X +1891(in)X +2012(memory)X +2338(may)X +2706 538(bene\256t)N +2966(from)X +3164(a)X +3242(smaller)X +3520(bucket)X +3776(size.)X +3983(A)X +4082(bucket)X +4337(size)X +2706 626(smaller)N +2962(than)X +3120(64)X +3220(bytes)X +3409(is)X +3482(not)X +3604(recommended.)X +2878 740(The)N +3031(\256ll)X +3147(factor)X +3363(indicates)X +3676(a)X +3740(desired)X +4000(density)X +4258(within)X +2706 828(the)N +2833(hash)X +3009(table.)X +3234(It)X +3312(is)X +3394(an)X +3499(approximation)X +3995(of)X +4091(the)X +4217(number)X +2706 916(of)N +2815(keys)X +3004(allowed)X +3300(to)X +3404(accumulate)X +3811(in)X +3914(any)X +4071(one)X +4228(bucket,)X +2706 1004(determining)N +3119(when)X +3319(the)X +3442(hash)X +3614(table)X +3795(grows.)X +4056(Its)X +4161(default)X +4409(is)X +2706 1092(eight.)N +2953(If)X +3054(the)X +3199(user)X +3380(knows)X +3636(the)X +3781(average)X +4079(size)X +4251(of)X +4364(the)X +2706 1180(key/data)N +3008(pairs)X +3194(being)X +3402(stored)X +3627(in)X +3718(the)X +3845(table,)X +4050(near)X +4218(optimal)X +2706 1268(bucket)N +2943(sizes)X +3122(and)X +3261(\256ll)X +3372(factors)X +3614(may)X +3775(be)X +3874(selected)X +4155(by)X +4257(apply-)X +2706 1356(ing)N +2828(the)X +2946(equation:)X +0 f +8 s +2706 1655(\(1\))N +2994 -0.3938(\(\(average_pair_length)AX +3830(+)X +3906(4\))X +4020(*)X +3032 1743(ffactor\))N +3374(>=)X +3488(bsize)X +1 f +10 s +2706 2042(For)N +2859(highly)X +3104(time)X +3287(critical)X +3551(applications,)X +3999(experimenting)X +2706 2130(with)N +2919(different)X +3266(bucket)X +3550(sizes)X +3776(and)X +3962(\256ll)X +4120(factors)X +4409(is)X +2706 2218(encouraged.)N +2878 2332(Figures)N +3144(5a,b,)X +3326(and)X +3468(c)X +3530(illustrate)X +3836(the)X +3960(effects)X +4200(of)X +4292(vary-)X +2706 2420(ing)N +2841(page)X +3026(sizes)X +3215(and)X +3363(\256ll)X +3483(factors)X +3734(for)X +3860(the)X +3990(same)X +4187(data)X +4353(set.)X +2706 2508(The)N +2864(data)X +3031(set)X +3152(consisted)X +3482(of)X +3581(24474)X +3813(keys)X +3992(taken)X +4198(from)X +4386(an)X +2706 2596(online)N +2931(dictionary.)X +3301(The)X +3451(data)X +3609(value)X +3807(for)X +3925(each)X +4097(key)X +4237(was)X +4386(an)X +2706 2684(ASCII)N +2938(string)X +3143(for)X +3260(an)X +3359(integer)X +3605(from)X +3784(1)X +3847(to)X +3931(24474)X +4153(inclusive.)X +2706 2772(The)N +2867(test)X +3013(run)X +3155(consisted)X +3488(of)X +3590(creating)X +3884(a)X +3955(new)X +4124(hash)X +4306(table)X +2706 2860(\(where)N +2966(the)X +3100(ultimate)X +3398(size)X +3559(of)X +3662(the)X +3796(table)X +3987(was)X +4147(known)X +4400(in)X +2706 2948(advance\),)N +3054(entering)X +3354(each)X +3539(key/data)X +3848(pair)X +4010(into)X +4171(the)X +4306(table)X +2706 3036(and)N +2849(then)X +3014(retrieving)X +3353(each)X +3528(key/data)X +3827(pair)X +3979(from)X +4162(the)X +4286(table.)X +2706 3124(Each)N +2898(of)X +2996(the)X +3125(graphs)X +3369(shows)X +3599(the)X +3727(timings)X +3996(resulting)X +4306(from)X +2706 3212(varying)N +2973(the)X +3093(pagesize)X +3392(from)X +3570(128)X +3712(bytes)X +3903(to)X +3986(1M)X +4118(and)X +4255(the)X +4374(\256ll)X +2706 3300(factor)N +2929(from)X +3120(1)X +3195(to)X +3292(128.)X +3486(For)X +3631(each)X +3813(run,)X +3974(the)X +4106(buffer)X +4337(size)X +2706 3388(was)N +2874(set)X +3006(at)X +3106(1M.)X +3299(The)X +3466(tests)X +3650(were)X +3849(all)X +3971(run)X +4120(on)X +4242(an)X +4360(HP)X +2706 3476(9000/370)N +3077(\(33.3)X +3312(Mhz)X +3527(MC68030\),)X +3966(with)X +4176(16M)X +4395(of)X +2706 3564(memory,)N +3042(64K)X +3228(physically)X +3605(addressed)X +3970(cache,)X +4222(and)X +4386(an)X +2706 3652(HP7959S)N +3055(disk)X +3231(drive,)X +3459(running)X +3751(4.3BSD-Reno)X +4244(single-)X +2706 3740(user.)N +2878 3854(Both)N +3066(system)X +3321(time)X +3496(\(Figure)X +3764(5a\))X +3899(and)X +4047(elapsed)X +4320(time)X +2706 3942(\(Figure)N +2966(5b\))X +3097(show)X +3290(that)X +3434(for)X +3552(all)X +3655(bucket)X +3892(sizes,)X +4091(the)X +4212(greatest)X +2706 4030(performance)N +3137(gains)X +3329(are)X +3451(made)X +3648(by)X +3751(increasing)X +4104(the)X +4225(\256ll)X +4336(fac-)X +2706 4118(tor)N +2822(until)X +2995(equation)X +3298(1)X +3365(is)X +3445(satis\256ed.)X +3774(The)X +3925(user)X +4085(time)X +4253(shown)X +2706 4206(in)N +2791(Figure)X +3023(5c)X +3122(gives)X +3314(a)X +3373(more)X +3561(detailed)X +3838(picture)X +4083(of)X +4172(how)X +4332(per-)X +2706 4294(formance)N +3054(varies.)X +3330(The)X +3499(smaller)X +3778(bucket)X +4035(sizes)X +4234(require)X +2706 4382(fewer)N +2921(keys)X +3099(per)X +3233(page)X +3416(to)X +3509(satisfy)X +3749(equation)X +4056(1)X +4127(and)X +4274(there-)X +2706 4470(fore)N +2860(incur)X +3049(fewer)X +3257(collisions.)X +3607(However,)X +3946(when)X +4144(the)X +4265(buffer)X +2706 4558(pool)N +2884(size)X +3045(is)X +3134(\256xed,)X +3349(smaller)X +3620(pages)X +3838(imply)X +4059(more)X +4259(pages.)X +2706 4646(An)N +2830(increased)X +3160(number)X +3430(of)X +3522(pages)X +3730(means)X +3960(more)X +2 f +4150(malloc\(3\))X +1 f +2706 4734(calls)N +2879(and)X +3021(more)X +3212(overhead)X +3533(in)X +3621(the)X +3745(hash)X +3918(package's)X +4265(buffer)X +2706 4822(manager)N +3003(to)X +3085(manage)X +3355(the)X +3473(additional)X +3813(pages.)X +2878 4936(The)N +3028(tradeoff)X +3308(works)X +3529(out)X +3655(most)X +3834(favorably)X +4166(when)X +4364(the)X +2706 5024(page)N +2886(size)X +3039(is)X +3120(256)X +3268(and)X +3412(the)X +3538(\256ll)X +3654(factor)X +3870(is)X +3950(8.)X +4057(Similar)X +4319(con-)X +2706 5112(clusions)N +3009(were)X +3207(obtained)X +3524(if)X +3614(the)X +3753(test)X +3905(was)X +4071(run)X +4218(without)X +2706 5200(knowing)N +3007(the)X +3126(\256nal)X +3289(table)X +3466(size)X +3612(in)X +3695(advance.)X +4020(If)X +4095(the)X +4214(\256le)X +4337(was)X +2706 5288(closed)N +2942(and)X +3088(written)X +3345(to)X +3437(disk,)X +3620(the)X +3748(conclusions)X +4156(were)X +4343(still)X +2706 5376(the)N +2832(same.)X +3065(However,)X +3408(rereading)X +3740(the)X +3865(\256le)X +3994(from)X +4177(disk)X +4337(was)X +2706 5464(slightly)N +2983(faster)X +3199(if)X +3285(a)X +3358(larger)X +3583(bucket)X +3834(size)X +3996(and)X +4149(\256ll)X +4274(factor)X +2706 5552(were)N +2898(used)X +3079(\(1K)X +3238(bucket)X +3486(size)X +3645(and)X +3795(32)X +3909(\256ll)X +4031(factor\).)X +4320(This)X +2706 5640(follows)N +2987(intuitively)X +3356(from)X +3553(the)X +3691(improved)X +4038(ef\256ciency)X +4395(of)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(7)X + +8 p +%%Page: 8 8 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(performing)N +830(1K)X +965(reads)X +1172(from)X +1365(the)X +1500(disk)X +1670(rather)X +1894(than)X +2068(256)X +432 626(byte)N +609(reads.)X +857(In)X +962(general,)X +1257(performance)X +1702(for)X +1834(disk)X +2005(based)X +432 714(tables)N +639(is)X +712(best)X +861(when)X +1055(the)X +1173(page)X +1345(size)X +1490(is)X +1563(approximately)X +2046(1K.)X +10 f +432 802 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +619 2380 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +629 2437 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +648 2504 MXY +-12 25 Dl +24 0 Dl +-12 -25 Dl +686 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +762 2516 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +916 2515 MXY +-13 24 Dl +25 0 Dl +-12 -24 Dl +1222 2516 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1834 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1 Dt +619 2392 MXY +10 57 Dl +19 67 Dl +38 11 Dl +76 1 Dl +154 -1 Dl +306 1 Dl +612 -1 Dl +8 s +1 f +1628 2522(128)N +3 Dt +607 2245 MXY +24 Dc +617 2375 MXY +23 Dc +635 2442 MXY +24 Dc +674 2525 MXY +23 Dc +750 2529 MXY +24 Dc +904 2527 MXY +23 Dc +1210 MX +23 Dc +1822 2528 MXY +23 Dc +20 Ds +1 Dt +619 2245 MXY +10 130 Dl +19 67 Dl +38 83 Dl +76 4 Dl +154 -2 Dl +306 0 Dl +612 1 Dl +678 2482(256)N +-1 Ds +3 Dt +619 2127 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 2191 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 2334 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 2409 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 2516 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 2516 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +5 Dt +619 2139 MXY +10 65 Dl +19 142 Dl +38 75 Dl +76 108 Dl +154 -1 Dl +306 -1 Dl +612 0 Dl +694 2401(512)N +3 Dt +631 2064 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +641 2077 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +660 2132 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +698 2292 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +775 2382 MXY +-25 24 Dl +12 -12 Dl +-12 -12 Dl +25 24 Dl +928 2516 MXY +-25 24 Dl +13 -12 Dl +-13 -12 Dl +25 24 Dl +1234 2516 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +1846 2516 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +16 Ds +1 Dt +619 2076 MXY +10 14 Dl +19 54 Dl +38 160 Dl +76 90 Dl +154 134 Dl +306 1 Dl +612 -1 Dl +694 2257(1024)N +-1 Ds +3 Dt +619 1877 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +629 1855 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +648 1838 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +686 1860 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +762 1923 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +916 2087 MXY +12 -24 Dl +-25 0 Dl +13 24 Dl +1222 2256 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +1834 2541 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +619 1865 MXY +10 -22 Dl +19 -17 Dl +38 21 Dl +76 64 Dl +154 164 Dl +306 169 Dl +612 285 Dl +1645 2427(4096)N +619 1243 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 1196 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 1146 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 1174 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 1249 MXY +0 24 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 1371 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 1680 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 1999 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +619 1255 MXY +10 -47 Dl +19 -50 Dl +38 28 Dl +76 75 Dl +154 122 Dl +306 309 Dl +612 319 Dl +1741 1934(8192)N +5 Dt +609 2531 MXY +1225 0 Dl +609 MX +0 -1553 Dl +2531 MY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +593 2625(0)N +-1 Ds +5 Dt +916 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +884 2625(32)N +-1 Ds +5 Dt +1222 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1190 2625(64)N +-1 Ds +5 Dt +1528 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1496 2625(96)N +-1 Ds +5 Dt +1834 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1786 2625(128)N +-1 Ds +5 Dt +609 2531 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +545 2558(0)N +-1 Ds +5 Dt +609 2013 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 2040(100)N +-1 Ds +5 Dt +609 1496 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1523(200)N +-1 Ds +5 Dt +609 978 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1005(300)N +1088 2724(Fill)N +1194(Factor)X +422 1611(S)N +426 1667(e)N +426 1724(c)N +424 1780(o)N +424 1837(n)N +424 1893(d)N +428 1949(s)N +3 Dt +-1 Ds +3 f +432 2882(Figure)N +636(5a:)X +1 f +744(System)X +956(Time)X +1113(for)X +1209(dictionary)X +1490(data)X +1618(set)X +1711(with)X +1847(1M)X +1958(of)X +2033(buffer)X +432 2970(space)N +594(and)X +707(varying)X +923(bucket)X +1114(sizes)X +1259(and)X +1372(\256ll)X +1465(factors.)X +1675(Each)X +1823(line)X +1940(is)X +2004(labeled)X +432 3058(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +10 f +432 3234 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +8 s +1 f +428 4381(s)N +424 4325(d)N +424 4269(n)N +424 4212(o)N +426 4156(c)N +426 4099(e)N +422 4043(S)N +1116 5156(Fill)N +1222(Factor)X +506 3437(3200)N +4 Ds +1 Dt +666 3410 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 3825(2400)N +4 Ds +1 Dt +666 3799 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 4214(1600)N +4 Ds +1 Dt +666 4186 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +538 4602(800)N +4 Ds +1 Dt +666 4575 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +602 4990(0)N +4 Ds +1 Dt +666 4963 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +1786 5057(128)N +4 Ds +1 Dt +1834 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1510 5057(96)N +4 Ds +1 Dt +1542 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1218 5057(64)N +4 Ds +1 Dt +1250 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +926 5057(32)N +4 Ds +1 Dt +958 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +650 5057(0)N +4 Ds +1 Dt +666 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +4963 MY +0 -1553 Dl +4963 MY +1168 0 Dl +1741 4752(8192)N +3 Dt +675 3732 MXY +9 -172 Dl +18 -118 Dl +37 128 Dl +73 -121 Dl +146 623 Dl +292 497 Dl +584 245 Dl +4802 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4557 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +958 4060 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 3437 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 3558 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 3430 MXY +0 25 Dl +0 -13 Dl +13 0 Dl +-25 0 Dl +684 3548 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 3720 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1637 4912(4096)N +675 4307 MXY +9 -58 Dl +18 30 Dl +37 89 Dl +73 144 Dl +146 235 Dl +292 122 Dl +584 89 Dl +4970 MY +12 -24 Dl +-24 0 Dl +12 24 Dl +1250 4881 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +958 4759 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +812 4524 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +739 4380 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +702 4291 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +684 4261 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +675 4319 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +734 4662(1024)N +16 Ds +1 Dt +675 4352 MXY +9 60 Dl +18 134 Dl +37 266 Dl +73 117 Dl +146 30 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1846 4946 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +1262 4946 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +970 4947 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +824 4917 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +751 4800 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +715 4534 MXY +-25 25 Dl +12 -13 Dl +-12 -12 Dl +25 25 Dl +696 4400 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +687 4339 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +718 4792(512)N +5 Dt +675 4422 MXY +9 137 Dl +18 278 Dl +37 105 Dl +73 18 Dl +146 -1 Dl +292 0 Dl +584 -1 Dl +3 Dt +4946 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4946 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +958 4947 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 4948 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 4930 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 4824 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +684 4547 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 4410 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +750 4921(256)N +20 Ds +1 Dt +675 4597 MXY +9 246 Dl +18 106 Dl +37 10 Dl +73 0 Dl +146 0 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1822 MX +23 Dc +1238 4959 MXY +23 Dc +946 MX +23 Dc +800 MX +23 Dc +727 MX +23 Dc +691 4949 MXY +23 Dc +672 4843 MXY +24 Dc +663 4597 MXY +24 Dc +1395 4961(128)N +1 Dt +675 4855 MXY +9 93 Dl +18 10 Dl +37 1 Dl +73 0 Dl +146 -1 Dl +292 0 Dl +584 0 Dl +3 Dt +4946 MY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1250 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +958 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +812 MX +-12 25 Dl +24 0 Dl +-12 -25 Dl +739 4947 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +702 4946 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +684 4936 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +675 4843 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +3 Dt +-1 Ds +3 f +432 5314(Figure)N +634(5b:)X +1 f +744(Elapsed)X +967(Time)X +1123(for)X +1218(dictionary)X +1498(data)X +1625(set)X +1717(with)X +1851(1M)X +1960(of)X +2033(buffer)X +432 5402(space)N +593(and)X +705(varying)X +920(bucket)X +1110(sizes)X +1254(and)X +1366(\256ll)X +1457(factors.)X +1681(Each)X +1827(line)X +1942(is)X +2004(labeled)X +432 5490(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +2590 538(If)N +2677(an)X +2785(approximation)X +3284(of)X +3383(the)X +3513(number)X +3790(of)X +3889(elements)X +2418 626(ultimately)N +2773(to)X +2866(be)X +2973(stored)X +3200(in)X +3293(the)X +3422(hash)X +3599(table)X +3785(is)X +3868(known)X +4116(at)X +2418 714(the)N +2564(time)X +2754(of)X +2869(creation,)X +3196(the)X +3342(hash)X +3536(package)X +3847(takes)X +4059(this)X +2418 802(number)N +2688(as)X +2779(a)X +2839(parameter)X +3185(and)X +3325(uses)X +3487(it)X +3555(to)X +3641(hash)X +3812(entries)X +4050(into)X +2418 890(the)N +2541(full)X +2677(sized)X +2867(table)X +3048(rather)X +3261(than)X +3424(growing)X +3716(the)X +3838(table)X +4018(from)X +2418 978(a)N +2477(single)X +2691(bucket.)X +2968(If)X +3044(this)X +3181(number)X +3448(is)X +3523(not)X +3647(known,)X +3907(the)X +4027(hash)X +2418 1066(table)N +2632(starts)X +2859(with)X +3059(a)X +3153(single)X +3402(bucket)X +3674(and)X +3848(gracefully)X +2418 1154(expands)N +2707(as)X +2800(elements)X +3111(are)X +3236(added,)X +3474(although)X +3780(a)X +3842(slight)X +4044(per-)X +2418 1242(formance)N +2747(degradation)X +3151(may)X +3313(be)X +3413(noticed.)X +3713(Figure)X +3946(6)X +4010(illus-)X +2418 1330(trates)N +2625(the)X +2756(difference)X +3116(in)X +3211(performance)X +3651(between)X +3952(storing)X +2418 1418(keys)N +2588(in)X +2673(a)X +2732(\256le)X +2857(when)X +3054(the)X +3174(ultimate)X +3458(size)X +3605(is)X +3680(known)X +3920(\(the)X +4067(left)X +2418 1506(bars)N +2581(in)X +2672(each)X +2849(set\),)X +3014(compared)X +3360(to)X +3450(building)X +3744(the)X +3870(\256le)X +4000(when)X +2418 1594(the)N +2550(ultimate)X +2846(size)X +3005(is)X +3091(unknown)X +3422(\(the)X +3580(right)X +3764(bars)X +3931(in)X +4026(each)X +2418 1682(set\).)N +2609(Once)X +2814(the)X +2947(\256ll)X +3069(factor)X +3291(is)X +3378(suf\256ciently)X +3772(high)X +3948(for)X +4076(the)X +2418 1770(page)N +2596(size)X +2747(\(8\),)X +2887(growing)X +3180(the)X +3304(table)X +3486(dynamically)X +3908(does)X +4081(lit-)X +2418 1858(tle)N +2518(to)X +2600(degrade)X +2875(performance.)X +10 f +2418 1946 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +9 s +1 f +2413 3238(s)N +2409 3173(d)N +2409 3108(n)N +2409 3043(o)N +2411 2979(c)N +2411 2914(e)N +2407 2849(S)N +3143 4129(Fill)N +3261(Factor)X +2448 2152(15)N +4 Ds +1 Dt +2557 2122 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2448 2747(10)N +4 Ds +1 Dt +2557 2717 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3343(5)N +4 Ds +1 Dt +2557 3313 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3938(0)N +4 Ds +1 Dt +2557 3908 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +3976 4015(128)N +4 Ds +1 Dt +4030 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3626 4015(96)N +4 Ds +1 Dt +3662 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3258 4015(64)N +4 Ds +1 Dt +3294 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2889 4015(32)N +4 Ds +1 Dt +2925 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2539 4015(0)N +4 Ds +1 Dt +2557 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3908 MY +0 -1786 Dl +3908 MY +1473 0 Dl +4053 2378(8192)N +3 Dt +2569 2277 MXY +11 0 Dl +23 48 Dl +46 -167 Dl +92 35 Dl +184 12 Dl +369 143 Dl +736 0 Dl +2334 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2334 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2192 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 2180 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2649 2144 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2311 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2580 2263 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2263 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2591(4096)N +2569 2348 MXY +11 -11 Dl +23 -96 Dl +46 71 Dl +92 72 Dl +184 226 Dl +369 48 Dl +736 -60 Dl +2612 MY +14 -28 Dl +-28 0 Dl +14 28 Dl +3294 2672 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +2925 2624 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2741 2398 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2649 2326 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2603 2255 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2580 2350 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2569 2362 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +4053 2681(1024)N +16 Ds +1 Dt +2569 2300 MXY +11 48 Dl +23 96 Dl +46 95 Dl +92 274 Dl +184 202 Dl +369 -155 Dl +736 -190 Dl +-1 Ds +3 Dt +4044 2656 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +3307 2846 MXY +-27 28 Dl +14 -14 Dl +-14 -14 Dl +27 28 Dl +2939 3001 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2755 2799 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2663 2525 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2617 2430 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2594 2334 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2582 2287 MXY +-27 27 Dl +14 -14 Dl +-14 -13 Dl +27 27 Dl +4053 2851(512)N +5 Dt +2569 2372 MXY +11 -24 Dl +23 405 Dl +46 83 Dl +92 227 Dl +184 -72 Dl +369 -119 Dl +736 -107 Dl +3 Dt +2751 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2858 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2977 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 3049 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2649 2823 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2739 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2580 2334 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2358 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2795(256)N +20 Ds +1 Dt +2569 2456 MXY +11 285 Dl +23 95 Dl +46 251 Dl +92 -60 Dl +184 -84 Dl +369 -107 Dl +736 -71 Dl +-1 Ds +3 Dt +4016 MX +27 Dc +3280 2836 MXY +27 Dc +2912 2943 MXY +27 Dc +2728 3027 MXY +27 Dc +2635 3087 MXY +28 Dc +2589 2836 MXY +28 Dc +2566 2741 MXY +27 Dc +2554 2456 MXY +28 Dc +4053 2741(128)N +1 Dt +2569 2729 MXY +11 203 Dl +23 131 Dl +46 -60 Dl +92 -119 Dl +184 -60 Dl +369 -83 Dl +736 -12 Dl +3 Dt +2716 MY +-14 27 Dl +28 0 Dl +-14 -27 Dl +3294 2727 MXY +-14 28 Dl +27 0 Dl +-13 -28 Dl +2925 2811 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2741 2870 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2649 2989 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2603 3049 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2580 2918 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2569 2716 MXY +-14 27 Dl +27 0 Dl +-13 -27 Dl +3 Dt +-1 Ds +3 f +8 s +2418 4286(Figure)N +2628(5c:)X +1 f +2738(User)X +2887(Time)X +3051(for)X +3154(dictionary)X +3442(data)X +3577(set)X +3677(with)X +3820(1M)X +3938(of)X +4019(buffer)X +2418 4374(space)N +2579(and)X +2691(varying)X +2906(bucket)X +3096(sizes)X +3240(and)X +3352(\256ll)X +3443(factors.)X +3667(Each)X +3813(line)X +3928(is)X +3990(labeled)X +2418 4462(with)N +2548(its)X +2625(bucket)X +2811(size.)X +10 s +10 f +2418 4638 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2590 4840(Since)N +2796(no)X +2904(known)X +3150(hash)X +3325(function)X +3620(performs)X +3938(equally)X +2418 4928(well)N +2589(on)X +2702(all)X +2815(possible)X +3110(data,)X +3297(the)X +3428(user)X +3595(may)X +3766(\256nd)X +3923(that)X +4076(the)X +2418 5016(built-in)N +2678(hash)X +2849(function)X +3140(does)X +3311(poorly)X +3544(on)X +3648(a)X +3708(particular)X +4040(data)X +2418 5104(set.)N +2548(In)X +2636(this)X +2771(case,)X +2950(a)X +3006(hash)X +3173(function,)X +3480(taking)X +3700(two)X +3840(arguments)X +2418 5192(\(a)N +2507(pointer)X +2760(to)X +2848(a)X +2910(byte)X +3074(string)X +3282(and)X +3424(a)X +3486(length\))X +3739(and)X +3880(returning)X +2418 5280(an)N +2517(unsigned)X +2829(long)X +2993(to)X +3077(be)X +3175(used)X +3344(as)X +3433(the)X +3553(hash)X +3722(value,)X +3938(may)X +4098(be)X +2418 5368(speci\256ed)N +2731(at)X +2817(hash)X +2992(table)X +3176(creation)X +3463(time.)X +3673(When)X +3893(an)X +3996(exist-)X +2418 5456(ing)N +2570(hash)X +2767(table)X +2973(is)X +3076(opened)X +3358(and)X +3524(a)X +3609(hash)X +3805(function)X +4121(is)X +2418 5544(speci\256ed,)N +2752(the)X +2879(hash)X +3054(package)X +3346(will)X +3498(try)X +3615(to)X +3705(determine)X +4054(that)X +2418 5632(the)N +2546(hash)X +2723(function)X +3020(supplied)X +3321(is)X +3404(the)X +3532(one)X +3678(with)X +3850(which)X +4076(the)X +2418 5720(table)N +2630(was)X +2811(created.)X +3139(There)X +3382(are)X +3536(a)X +3627(variety)X +3905(of)X +4027(hash)X +3 f +432 5960(8)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +9 p +%%Page: 9 9 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(functions)N +1065(provided)X +1397(with)X +1586(the)X +1731(package.)X +2082(The)X +2253(default)X +720 626(function)N +1014(for)X +1135(the)X +1260(package)X +1551(is)X +1631(the)X +1755(one)X +1897(which)X +2119(offered)X +2378(the)X +720 714(best)N +875(performance)X +1308(in)X +1396(terms)X +1600(of)X +1693(cycles)X +1920(executed)X +2232(per)X +2360(call)X +720 802(\(it)N +827(did)X +965(not)X +1103(produce)X +1398(the)X +1531(fewest)X +1776(collisions)X +2117(although)X +2432(it)X +720 890(was)N +866(within)X +1091(a)X +1148(small)X +1341(percentage)X +1710(of)X +1797(the)X +1915(function)X +2202(that)X +2342(pro-)X +720 978(duced)N +947(the)X +1080(fewest)X +1324(collisions\).)X +1731(Again,)X +1981(in)X +2077(time)X +2253(critical)X +720 1066(applications,)N +1152(users)X +1342(are)X +1466(encouraged)X +1862(to)X +1949(experiment)X +2334(with)X +720 1154(a)N +783(variety)X +1032(of)X +1125(hash)X +1298(functions)X +1622(to)X +1710(achieve)X +1982(optimal)X +2252(perfor-)X +720 1242(mance.)N +10 f +720 1330 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +1038 2925(Full)N +1149(size)X +1251(table)X +1384(\(left\))X +1547 2718(Fill)N +1643(Factor)X +2268 2662(64)N +1964(32)X +1674(16)X +1384(8)X +1093(4)X +4 Ds +1 Dt +900 2280 MXY +1548 0 Dl +900 1879 MXY +1548 0 Dl +900 1506 MXY +1548 0 Dl +1563 2902 MXY +111 0 Dl +-1 Ds +900 MX +110 0 Dl +1425 2828(System)N +983(User)X +1895 2778 MXY + 1895 2778 lineto + 1950 2778 lineto + 1950 2833 lineto + 1895 2833 lineto + 1895 2778 lineto +closepath 21 1895 2778 1950 2833 Dp +1342 MX + 1342 2778 lineto + 1397 2778 lineto + 1397 2833 lineto + 1342 2833 lineto + 1342 2778 lineto +closepath 14 1342 2778 1397 2833 Dp +900 MX + 900 2778 lineto + 955 2778 lineto + 955 2833 lineto + 900 2833 lineto + 900 2778 lineto +closepath 3 900 2778 955 2833 Dp +5 Dt +2283 2211 MXY +96 0 Dl +1992 MX +97 0 Dl +1702 MX +97 0 Dl +1411 2252 MXY +97 0 Dl +4 Ds +1 Dt +2283 2211 MXY + 2283 2211 lineto + 2379 2211 lineto + 2379 2252 lineto + 2283 2252 lineto + 2283 2211 lineto +closepath 14 2283 2211 2379 2252 Dp +1992 MX + 1992 2211 lineto + 2089 2211 lineto + 2089 2252 lineto + 1992 2252 lineto + 1992 2211 lineto +closepath 14 1992 2211 2089 2252 Dp +1702 MX + 1702 2211 lineto + 1799 2211 lineto + 1799 2252 lineto + 1702 2252 lineto + 1702 2211 lineto +closepath 14 1702 2211 1799 2252 Dp +1411 2252 MXY + 1411 2252 lineto + 1508 2252 lineto + 1508 2294 lineto + 1411 2294 lineto + 1411 2252 lineto +closepath 14 1411 2252 1508 2294 Dp +2283 MX + 2283 2252 lineto + 2379 2252 lineto + 2379 2612 lineto + 2283 2612 lineto + 2283 2252 lineto +closepath 3 2283 2252 2379 2612 Dp +1992 MX + 1992 2252 lineto + 2089 2252 lineto + 2089 2612 lineto + 1992 2612 lineto + 1992 2252 lineto +closepath 3 1992 2252 2089 2612 Dp +1702 MX + 1702 2252 lineto + 1799 2252 lineto + 1799 2612 lineto + 1702 2612 lineto + 1702 2252 lineto +closepath 3 1702 2252 1799 2612 Dp +1411 2294 MXY + 1411 2294 lineto + 1508 2294 lineto + 1508 2612 lineto + 1411 2612 lineto + 1411 2294 lineto +closepath 3 1411 2294 1508 2612 Dp +-1 Ds +2158 2238 MXY + 2158 2238 lineto + 2255 2238 lineto + 2255 2252 lineto + 2158 2252 lineto + 2158 2238 lineto +closepath 21 2158 2238 2255 2252 Dp +1868 MX + 1868 2238 lineto + 1965 2238 lineto + 1965 2280 lineto + 1868 2280 lineto + 1868 2238 lineto +closepath 21 1868 2238 1965 2280 Dp +1577 MX + 1577 2238 lineto + 1674 2238 lineto + 1674 2308 lineto + 1577 2308 lineto + 1577 2238 lineto +closepath 21 1577 2238 1674 2308 Dp +1287 2308 MXY + 1287 2308 lineto + 1287 2280 lineto + 1384 2280 lineto + 1384 2308 lineto + 1287 2308 lineto +closepath 21 1287 2280 1384 2308 Dp +2158 2280 MXY + 2158 2280 lineto + 2158 2252 lineto + 2255 2252 lineto + 2255 2280 lineto + 2158 2280 lineto +closepath 14 2158 2252 2255 2280 Dp +1868 2308 MXY + 1868 2308 lineto + 1868 2280 lineto + 1965 2280 lineto + 1965 2308 lineto + 1868 2308 lineto +closepath 14 1868 2280 1965 2308 Dp +1577 2335 MXY + 1577 2335 lineto + 1577 2308 lineto + 1674 2308 lineto + 1674 2335 lineto + 1577 2335 lineto +closepath 14 1577 2308 1674 2335 Dp +1287 2363 MXY + 1287 2363 lineto + 1287 2308 lineto + 1384 2308 lineto + 1384 2363 lineto + 1287 2363 lineto +closepath 14 1287 2308 1384 2363 Dp +2158 2280 MXY + 2158 2280 lineto + 2255 2280 lineto + 2255 2612 lineto + 2158 2612 lineto + 2158 2280 lineto +closepath 3 2158 2280 2255 2612 Dp +1868 2308 MXY + 1868 2308 lineto + 1965 2308 lineto + 1965 2612 lineto + 1868 2612 lineto + 1868 2308 lineto +closepath 3 1868 2308 1965 2612 Dp +1577 2335 MXY + 1577 2335 lineto + 1674 2335 lineto + 1674 2612 lineto + 1577 2612 lineto + 1577 2335 lineto +closepath 3 1577 2335 1674 2612 Dp +1287 2363 MXY + 1287 2363 lineto + 1384 2363 lineto + 1384 2612 lineto + 1287 2612 lineto + 1287 2363 lineto +closepath 3 1287 2363 1384 2612 Dp +4 Ds +1121 2066 MXY + 1121 2066 lineto + 1218 2066 lineto + 1224 2080 lineto + 1127 2080 lineto + 1121 2066 lineto +closepath 21 1121 2066 1224 2080 Dp +2080 MY + 1121 2080 lineto + 1218 2080 lineto + 1218 2273 lineto + 1121 2273 lineto + 1121 2080 lineto +closepath 14 1121 2080 1218 2273 Dp +2273 MY + 1121 2273 lineto + 1218 2273 lineto + 1218 2612 lineto + 1121 2612 lineto + 1121 2273 lineto +closepath 3 1121 2273 1218 2612 Dp +-1 Ds +997 1589 MXY + 997 1589 lineto + 1093 1589 lineto + 1093 1644 lineto + 997 1644 lineto + 997 1589 lineto +closepath 21 997 1589 1093 1644 Dp +1644 MY + 997 1644 lineto + 1093 1644 lineto + 1093 2280 lineto + 997 2280 lineto + 997 1644 lineto +closepath 14 997 1644 1093 2280 Dp +2280 MY + 997 2280 lineto + 1093 2280 lineto + 1093 2612 lineto + 997 2612 lineto + 997 2280 lineto +closepath 3 997 2280 1093 2612 Dp +10 s +719 2093(s)N +712 2037(d)N +712 1982(n)N +714 1927(o)N +716 1872(c)N +716 1816(e)N +712 1761(S)N +804 2286(10)N +804 1899(20)N +804 1540(30)N +3 Dt +900 1506 MXY +0 1106 Dl +1548 0 Dl +7 s +1978 2828(Elapsed)N +1701 2925(Dynamically)N +2018(grown)X +2184(table)X +2317(\(right\))X +3 Dt +-1 Ds +8 s +720 3180(Figure)N +934(6:)X +1 f +1020(The)X +1152(total)X +1299(regions)X +1520(indicate)X +1755(the)X +1865(difference)X +2154(between)X +2398(the)X +720 3268(elapsed)N +931(time)X +1065(and)X +1177(the)X +1275(sum)X +1402(of)X +1475(the)X +1573(system)X +1771(and)X +1883(user)X +2008(time.)X +2173(The)X +2291(left)X +2395(bar)X +720 3356(of)N +798(each)X +939(set)X +1035(depicts)X +1241(the)X +1344(timing)X +1537(of)X +1615(the)X +1718(test)X +1831(run)X +1940(when)X +2102(the)X +2204(number)X +2423(of)X +720 3444(entries)N +910(is)X +973(known)X +1167(in)X +1237(advance.)X +1496(The)X +1614(right)X +1754(bars)X +1879(depict)X +2054(the)X +2151(timing)X +2338(when)X +720 3532(the)N +814(\256le)X +912(is)X +971(grown)X +1150(from)X +1290(a)X +1334(single)X +1503(bucket.)X +10 s +10 f +720 3708 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3910(Since)N +1131(this)X +1307(hashing)X +1617(package)X +1942(provides)X +2279(buffer)X +720 3998(management,)N +1188(the)X +1323(amount)X +1600(of)X +1704(space)X +1920(allocated)X +2247(for)X +2378(the)X +720 4086(buffer)N +948(pool)X +1121(may)X +1290(be)X +1397(speci\256ed)X +1713(by)X +1824(the)X +1953(user.)X +2157(Using)X +2378(the)X +720 4174(same)N +910(data)X +1069(set)X +1183(and)X +1324(test)X +1459(procedure)X +1805(as)X +1896(used)X +2067(to)X +2153(derive)X +2378(the)X +720 4262(graphs)N +962(in)X +1052(Figures)X +1320(5a-c,)X +1507(Figure)X +1744(7)X +1812(shows)X +2039(the)X +2164(impact)X +2409(of)X +720 4350(varying)N +997(the)X +1126(size)X +1282(of)X +1380(the)X +1509(buffer)X +1737(pool.)X +1950(The)X +2106(bucket)X +2351(size)X +720 4438(was)N +873(set)X +989(to)X +1078(256)X +1225(bytes)X +1421(and)X +1564(the)X +1689(\256ll)X +1804(factor)X +2019(was)X +2171(set)X +2287(to)X +2376(16.)X +720 4526(The)N +869(buffer)X +1090(pool)X +1256(size)X +1404(was)X +1552(varied)X +1776(from)X +1955(0)X +2018(\(the)X +2166(minimum)X +720 4614(number)N +986(of)X +1074(pages)X +1277(required)X +1565(to)X +1647(be)X +1743(buffered\))X +2063(to)X +2145(1M.)X +2316(With)X +720 4702(1M)N +854(of)X +944(buffer)X +1164(space,)X +1386(the)X +1507(package)X +1794(performed)X +2151(no)X +2253(I/O)X +2382(for)X +720 4790(this)N +871(data)X +1040(set.)X +1204(As)X +1328(Figure)X +1572(7)X +1647(illustrates,)X +2013(increasing)X +2378(the)X +720 4878(buffer)N +944(pool)X +1113(size)X +1265(can)X +1404(have)X +1583(a)X +1646(dramatic)X +1954(affect)X +2165(on)X +2271(result-)X +720 4966(ing)N +842(performance.)X +2 f +8 s +1269 4941(7)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(7)N +1 f +9 s +826 5433(Some)N +1024(allocators)X +1338(are)X +1460(extremely)X +1782(inef\256cient)X +2107(at)X +2192(allocating)X +720 5513(memory.)N +1029(If)X +1110(you)X +1251(\256nd)X +1396(that)X +1536(applications)X +1916(are)X +2036(running)X +2292(out)X +2416(of)X +720 5593(memory)N +1005(before)X +1234(you)X +1386(think)X +1578(they)X +1746(should,)X +2000(try)X +2124(varying)X +2388(the)X +720 5673(pagesize)N +986(to)X +1060(get)X +1166(better)X +1348(utilization)X +1658(from)X +1816(the)X +1922(memory)X +2180(allocator.)X +10 s +2830 1975 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +2853 2004 MXY +0 -27 Dl +28 0 Dl +0 27 Dl +-28 0 Dl +2876 2016 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2922 1998 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2967 2025 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3013 2031 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3059 MX +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3196 2052 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3561 2102 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +4292 2105 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +4 Ds +1 Dt +2844 1961 MXY +23 30 Dl +23 12 Dl +45 -18 Dl +46 26 Dl +46 6 Dl +45 0 Dl +137 21 Dl +366 50 Dl +730 3 Dl +9 s +4227 2158(User)N +-1 Ds +3 Dt +2830 1211 MXY +27 Dc +2853 1261 MXY +27 Dc +2876 1267 MXY +27 Dc +2921 1341 MXY +27 Dc +2967 1385 MXY +27 Dc +3013 1450 MXY +27 Dc +3059 1497 MXY +27 Dc +3196 1686 MXY +27 Dc +3561 2109 MXY +27 Dc +4292 2295 MXY +27 Dc +20 Ds +1 Dt +2844 1211 MXY +23 50 Dl +23 6 Dl +45 74 Dl +46 44 Dl +46 65 Dl +45 47 Dl +137 189 Dl +366 423 Dl +730 186 Dl +4181 2270(System)N +-1 Ds +3 Dt +2844 583 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2867 672 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2890 701 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2935 819 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +2981 849 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3027 908 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +3072 1026 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-27 0 Dl +3209 1292 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +3575 1823 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +4305 2059 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +5 Dt +2844 597 MXY +23 88 Dl +23 30 Dl +45 118 Dl +46 30 Dl +46 59 Dl +45 118 Dl +137 265 Dl +366 532 Dl +730 236 Dl +4328 2103(Total)N +2844 2310 MXY +1461 0 Dl +2844 MX +0 -1772 Dl +2310 MY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +2826 2416(0)N +-1 Ds +5 Dt +3209 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3155 2416(256)N +-1 Ds +5 Dt +3575 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3521 2416(512)N +-1 Ds +5 Dt +3940 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3886 2416(768)N +-1 Ds +5 Dt +4305 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +4233 2416(1024)N +-1 Ds +5 Dt +2844 2310 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2771 2340(0)N +-1 Ds +5 Dt +2844 2014 MXY +-18 0 Dl +2844 1719 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1749(20)N +-1 Ds +5 Dt +2844 1423 MXY +-18 0 Dl +2844 1128 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1158(40)N +-1 Ds +5 Dt +2844 833 MXY +-18 0 Dl +2844 538 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 568(60)N +3239 2529(Buffer)N +3445(Pool)X +3595(Size)X +3737(\(in)X +3835(K\))X +2695 1259(S)N +2699 1324(e)N +2699 1388(c)N +2697 1452(o)N +2697 1517(n)N +2697 1581(d)N +2701 1645(s)N +3 Dt +-1 Ds +3 f +8 s +2706 2773(Figure)N +2908(7:)X +1 f +2982(User)X +3123(time)X +3258(is)X +3322(virtually)X +3560(insensitive)X +3854(to)X +3924(the)X +4022(amount)X +4234(of)X +4307(buffer)X +2706 2861(pool)N +2852(available,)X +3130(however,)X +3396(both)X +3541(system)X +3750(time)X +3895(and)X +4018(elapsed)X +4240(time)X +4385(are)X +2706 2949(inversely)N +2960(proportional)X +3296(to)X +3366(the)X +3464(size)X +3583(of)X +3656(the)X +3753(buffer)X +3927(pool.)X +4092(Even)X +4242(for)X +4335(large)X +2706 3037(data)N +2831(sets)X +2946(where)X +3120(one)X +3230(expects)X +3439(few)X +3552(collisions,)X +3832(specifying)X +4116(a)X +4162(large)X +4307(buffer)X +2706 3125(pool)N +2836(dramatically)X +3171(improves)X +3425(performance.)X +10 s +10 f +2706 3301 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3175 3543(Enhanced)N +3536(Functionality)X +1 f +2878 3675(This)N +3046(hashing)X +3320(package)X +3609(provides)X +3910(a)X +3971(set)X +4085(of)X +4177(compati-)X +2706 3763(bility)N +2895(routines)X +3174(to)X +3257(implement)X +3620(the)X +2 f +3739(ndbm)X +1 f +3937(interface.)X +4279(How-)X +2706 3851(ever,)N +2893(when)X +3095(the)X +3220(native)X +3443(interface)X +3752(is)X +3832(used,)X +4026(the)X +4151(following)X +2706 3939(additional)N +3046(functionality)X +3475(is)X +3548(provided:)X +10 f +2798 4071(g)N +1 f +2946(Inserts)X +3197(never)X +3413(fail)X +3556(because)X +3847(too)X +3985(many)X +4199(keys)X +2946 4159(hash)N +3113(to)X +3195(the)X +3313(same)X +3498(value.)X +10 f +2798 4247(g)N +1 f +2946(Inserts)X +3187(never)X +3393(fail)X +3527(because)X +3808(key)X +3950(and/or)X +4181(asso-)X +2946 4335(ciated)N +3158(data)X +3312(is)X +3385(too)X +3507(large)X +10 f +2798 4423(g)N +1 f +2946(Hash)X +3131(functions)X +3449(may)X +3607(be)X +3703(user-speci\256ed.)X +10 f +2798 4511(g)N +1 f +2946(Multiple)X +3268(pages)X +3498(may)X +3683(be)X +3806(cached)X +4077(in)X +4186(main)X +2946 4599(memory.)N +2706 4731(It)N +2801(also)X +2976(provides)X +3298(a)X +3380(set)X +3514(of)X +3626(compatibility)X +4097(routines)X +4400(to)X +2706 4819(implement)N +3087(the)X +2 f +3224(hsearch)X +1 f +3516(interface.)X +3876(Again,)X +4130(the)X +4266(native)X +2706 4907(interface)N +3008(offers)X +3216(enhanced)X +3540(functionality:)X +10 f +2798 5039(g)N +1 f +2946(Files)X +3121(may)X +3279(grow)X +3464(beyond)X +2 f +3720(nelem)X +1 f +3932(elements.)X +10 f +2798 5127(g)N +1 f +2946(Multiple)X +3247(hash)X +3420(tables)X +3632(may)X +3795(be)X +3896(accessed)X +4203(con-)X +2946 5215(currently.)N +10 f +2798 5303(g)N +1 f +2946(Hash)X +3134(tables)X +3344(may)X +3505(be)X +3604(stored)X +3823(and)X +3962(accessed)X +4266(on)X +2946 5391(disk.)N +10 f +2798 5479(g)N +1 f +2946(Hash)X +3155(functions)X +3497(may)X +3679(be)X +3799(user-speci\256ed)X +4288(at)X +2946 5567(runtime.)N +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(9)X + +10 p +%%Page: 10 10 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +459 538(Relative)N +760(Performance)X +1227(of)X +1314(the)X +1441(New)X +1613(Implementation)X +1 f +604 670(The)N +761(performance)X +1200(testing)X +1445(of)X +1544(the)X +1674(new)X +1840(package)X +2135(is)X +432 758(divided)N +711(into)X +874(two)X +1033(test)X +1183(suites.)X +1424(The)X +1588(\256rst)X +1751(suite)X +1941(of)X +2046(tests)X +432 846(requires)N +727(that)X +882(the)X +1015(tables)X +1237(be)X +1348(read)X +1522(from)X +1713(and)X +1864(written)X +2126(to)X +432 934(disk.)N +640(In)X +742(these)X +942(tests,)X +1139(the)X +1272(basis)X +1467(for)X +1595(comparison)X +2003(is)X +2090(the)X +432 1022(4.3BSD-Reno)N +908(version)X +1169(of)X +2 f +1260(ndbm)X +1 f +1438(.)X +1502(Based)X +1722(on)X +1826(the)X +1948(designs)X +432 1110(of)N +2 f +521(sdbm)X +1 f +712(and)X +2 f +850(gdbm)X +1 f +1028(,)X +1070(they)X +1230(are)X +1351(expected)X +1659(to)X +1743(perform)X +2024(simi-)X +432 1198(larly)N +605(to)X +2 f +693(ndbm)X +1 f +871(,)X +917(and)X +1059(we)X +1179(do)X +1285(not)X +1413(show)X +1608(their)X +1781(performance)X +432 1286(numbers.)N +800(The)X +977(second)X +1252(suite)X +1454(contains)X +1772(the)X +1921(memory)X +432 1374(resident)N +712(test)X +849(which)X +1071(does)X +1243(not)X +1370(require)X +1623(that)X +1768(the)X +1891(\256les)X +2049(ever)X +432 1462(be)N +533(written)X +784(to)X +870(disk,)X +1047(only)X +1213(that)X +1357(hash)X +1528(tables)X +1739(may)X +1901(be)X +2001(mani-)X +432 1550(pulated)N +692(in)X +778(main)X +961(memory.)X +1291(In)X +1381(this)X +1519(test,)X +1673(we)X +1790(compare)X +2090(the)X +432 1638(performance)N +859(to)X +941(that)X +1081(of)X +1168(the)X +2 f +1286(hsearch)X +1 f +1560(routines.)X +604 1752(For)N +760(both)X +947(suites,)X +1194(two)X +1358(different)X +1679(databases)X +2031(were)X +432 1840(used.)N +656(The)X +818(\256rst)X +979(is)X +1069(the)X +1204(dictionary)X +1566(database)X +1880(described)X +432 1928(previously.)N +836(The)X +987(second)X +1236(was)X +1386(constructed)X +1781(from)X +1962(a)X +2023(pass-)X +432 2016(word)N +647(\256le)X +799(with)X +990(approximately)X +1502(300)X +1671(accounts.)X +2041(Two)X +432 2104(records)N +700(were)X +887(constructed)X +1287(for)X +1411(each)X +1589(account.)X +1909(The)X +2064(\256rst)X +432 2192(used)N +604(the)X +727(logname)X +1028(as)X +1120(the)X +1243(key)X +1384(and)X +1525(the)X +1648(remainder)X +1999(of)X +2090(the)X +432 2280(password)N +768(entry)X +965(for)X +1091(the)X +1221(data.)X +1427(The)X +1584(second)X +1839(was)X +1996(keyed)X +432 2368(by)N +541(uid)X +672(and)X +817(contained)X +1157(the)X +1283(entire)X +1494(password)X +1825(entry)X +2018(as)X +2113(its)X +432 2456(data)N +589(\256eld.)X +794(The)X +942(tests)X +1107(were)X +1287(all)X +1389(run)X +1518(on)X +1620(the)X +1740(HP)X +1864(9000)X +2046(with)X +432 2544(the)N +574(same)X +783(con\256guration)X +1254(previously)X +1636(described.)X +2027(Each)X +432 2632(test)N +576(was)X +734(run)X +874(\256ve)X +1027(times)X +1232(and)X +1380(the)X +1510(timing)X +1750(results)X +1991(of)X +2090(the)X +432 2720(runs)N +602(were)X +791(averaged.)X +1154(The)X +1311(variance)X +1616(across)X +1849(the)X +1979(5)X +2050(runs)X +432 2808(was)N +591(approximately)X +1088(1%)X +1229(of)X +1330(the)X +1462(average)X +1746(yielding)X +2041(95%)X +432 2896(con\256dence)N +800(intervals)X +1096(of)X +1183(approximately)X +1666(2%.)X +3 f +1021 3050(Disk)N +1196(Based)X +1420(Tests)X +1 f +604 3182(In)N +693(these)X +880(tests,)X +1064(we)X +1180(use)X +1308(a)X +1365(bucket)X +1600(size)X +1746(of)X +1834(1024)X +2015(and)X +2152(a)X +432 3270(\256ll)N +540(factor)X +748(of)X +835(32.)X +3 f +432 3384(create)N +663(test)X +1 f +547 3498(The)N +703(keys)X +881(are)X +1011(entered)X +1279(into)X +1433(the)X +1561(hash)X +1738(table,)X +1944(and)X +2090(the)X +547 3586(\256le)N +669(is)X +742(\257ushed)X +993(to)X +1075(disk.)X +3 f +432 3700(read)N +608(test)X +1 f +547 3814(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 3902(table.)N +3 f +432 4016(verify)N +653(test)X +1 f +547 4130(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 4218(table,)N +759(and)X +911(the)X +1045(data)X +1215(returned)X +1519(is)X +1608(compared)X +1961(against)X +547 4306(that)N +687(originally)X +1018(stored)X +1234(in)X +1316(the)X +1434(hash)X +1601(table.)X +3 f +432 4420(sequential)N +798(retrieve)X +1 f +547 4534(All)N +674(keys)X +846(are)X +970(retrieved)X +1281(in)X +1367(sequential)X +1716(order)X +1910(from)X +2090(the)X +547 4622(hash)N +724(table.)X +950(The)X +2 f +1105(ndbm)X +1 f +1313(interface)X +1625(allows)X +1863(sequential)X +547 4710(retrieval)N +848(of)X +948(the)X +1079(keys)X +1259(from)X +1448(the)X +1578(database,)X +1907(but)X +2041(does)X +547 4798(not)N +701(return)X +945(the)X +1094(data)X +1279(associated)X +1660(with)X +1853(each)X +2052(key.)X +547 4886(Therefore,)N +929(we)X +1067(compare)X +1388(the)X +1530(performance)X +1980(of)X +2090(the)X +547 4974(new)N +703(package)X +989(to)X +1073(two)X +1215(different)X +1514(runs)X +1674(of)X +2 f +1763(ndbm)X +1 f +1941(.)X +2002(In)X +2090(the)X +547 5062(\256rst)N +697(case,)X +2 f +882(ndbm)X +1 f +1086(returns)X +1335(only)X +1503(the)X +1627(keys)X +1800(while)X +2003(in)X +2090(the)X +547 5150(second,)N +2 f +823(ndbm)X +1 f +1034(returns)X +1290(both)X +1465(the)X +1596(keys)X +1776(and)X +1924(the)X +2054(data)X +547 5238(\(requiring)N +894(a)X +956(second)X +1204(call)X +1345(to)X +1432(the)X +1555(library\).)X +1861(There)X +2074(is)X +2152(a)X +547 5326(single)N +764(run)X +897(for)X +1017(the)X +1141(new)X +1300(library)X +1539(since)X +1729(it)X +1798(returns)X +2046(both)X +547 5414(the)N +665(key)X +801(and)X +937(the)X +1055(data.)X +3 f +3014 538(In-Memory)N +3431(Test)X +1 f +2590 670(This)N +2757(test)X +2892(uses)X +3054(a)X +3114(bucket)X +3352(size)X +3501(of)X +3592(256)X +3736(and)X +3876(a)X +3936(\256ll)X +4048(fac-)X +2418 758(tor)N +2527(of)X +2614(8.)X +3 f +2418 872(create/read)N +2827(test)X +1 f +2533 986(In)N +2627(this)X +2769(test,)X +2927(a)X +2989(hash)X +3162(table)X +3344(is)X +3423(created)X +3682(by)X +3788(inserting)X +4094(all)X +2533 1074(the)N +2660(key/data)X +2961(pairs.)X +3186(Then)X +3380(a)X +3445(keyed)X +3666(retrieval)X +3963(is)X +4044(per-)X +2533 1162(formed)N +2801(for)X +2931(each)X +3115(pair,)X +3295(and)X +3446(the)X +3579(hash)X +3761(table)X +3952(is)X +4040(des-)X +2533 1250(troyed.)N +3 f +2938 1404(Performance)N +3405(Results)X +1 f +2590 1536(Figures)N +2866(8a)X +2978(and)X +3130(8b)X +3246(show)X +3451(the)X +3585(user)X +3755(time,)X +3952(system)X +2418 1624(time,)N +2608(and)X +2752(elapsed)X +3021(time)X +3191(for)X +3312(each)X +3487(test)X +3625(for)X +3746(both)X +3915(the)X +4040(new)X +2418 1712(implementation)N +2951(and)X +3098(the)X +3227(old)X +3360(implementation)X +3893(\()X +2 f +3920(hsearch)X +1 f +2418 1800(or)N +2 f +2528(ndbm)X +1 f +2706(,)X +2769(whichever)X +3147(is)X +3243(appropriate\))X +3678(as)X +3787(well)X +3967(as)X +4076(the)X +2418 1888(improvement.)N +2929(The)X +3098(improvement)X +3569(is)X +3666(expressed)X +4027(as)X +4138(a)X +2418 1976(percentage)N +2787(of)X +2874(the)X +2992(old)X +3114(running)X +3383(time:)X +0 f +8 s +2418 2275(%)N +2494(=)X +2570(100)X +2722(*)X +2798 -0.4219(\(old_time)AX +3178(-)X +3254 -0.4219(new_time\))AX +3634(/)X +3710(old_time)X +1 f +10 s +2590 2600(In)N +2700(nearly)X +2944(all)X +3067(cases,)X +3299(the)X +3439(new)X +3615(routines)X +3915(perform)X +2418 2688(better)N +2628(than)X +2793(the)X +2918(old)X +3047(routines)X +3332(\(both)X +2 f +3527(hsearch)X +1 f +3807(and)X +2 f +3949(ndbm)X +1 f +4127(\).)X +2418 2776(Although)N +2755(the)X +3 f +2888(create)X +1 f +3134(tests)X +3311(exhibit)X +3567(superior)X +3864(user)X +4032(time)X +2418 2864(performance,)N +2869(the)X +2991(test)X +3126(time)X +3292(is)X +3369(dominated)X +3731(by)X +3834(the)X +3955(cost)X +4107(of)X +2418 2952(writing)N +2677(the)X +2803(actual)X +3023(\256le)X +3153(to)X +3243(disk.)X +3444(For)X +3583(the)X +3709(large)X +3897(database)X +2418 3040(\(the)N +2564(dictionary\),)X +2957(this)X +3093(completely)X +3470(overwhelmed)X +3927(the)X +4045(sys-)X +2418 3128(tem)N +2570(time.)X +2783(However,)X +3129(for)X +3254(the)X +3383(small)X +3587(data)X +3752(base,)X +3946(we)X +4071(see)X +2418 3216(that)N +2569(differences)X +2958(in)X +3051(both)X +3224(user)X +3389(and)X +3536(system)X +3788(time)X +3960(contri-)X +2418 3304(bute)N +2576(to)X +2658(the)X +2776(superior)X +3059(performance)X +3486(of)X +3573(the)X +3691(new)X +3845(package.)X +2590 3418(The)N +3 f +2764(read)X +1 f +2920(,)X +3 f +2989(verify)X +1 f +3190(,)X +3259(and)X +3 f +3424(sequential)X +1 f +3818(results)X +4075(are)X +2418 3506(deceptive)N +2758(for)X +2883(the)X +3012(small)X +3216(database)X +3524(since)X +3720(the)X +3849(entire)X +4063(test)X +2418 3594(ran)N +2551(in)X +2643(under)X +2856(a)X +2922(second.)X +3215(However,)X +3560(on)X +3669(the)X +3796(larger)X +4013(data-)X +2418 3682(base)N +2590(the)X +3 f +2716(read)X +1 f +2900(and)X +3 f +3044(verify)X +1 f +3273(tests)X +3443(bene\256t)X +3689(from)X +3873(the)X +3999(cach-)X +2418 3770(ing)N +2546(of)X +2639(buckets)X +2910(in)X +2998(the)X +3122(new)X +3282(package)X +3571(to)X +3658(improve)X +3950(perfor-)X +2418 3858(mance)N +2666(by)X +2784(over)X +2965(80%.)X +3169(Since)X +3384(the)X +3519(\256rst)X +3 f +3680(sequential)X +1 f +4063(test)X +2418 3946(does)N +2598(not)X +2733(require)X +2 f +2994(ndbm)X +1 f +3205(to)X +3299(return)X +3523(the)X +3653(data)X +3819(values,)X +4076(the)X +2418 4034(user)N +2573(time)X +2735(is)X +2808(lower)X +3011(than)X +3169(for)X +3283(the)X +3401(new)X +3555(package.)X +3879(However)X +2418 4122(when)N +2613(we)X +2728(require)X +2977(both)X +3139(packages)X +3454(to)X +3536(return)X +3748(data,)X +3922(the)X +4040(new)X +2418 4210(package)N +2702(excels)X +2923(in)X +3005(all)X +3105(three)X +3286(timings.)X +2590 4324(The)N +2773(small)X +3003(database)X +3337(runs)X +3532(so)X +3660(quickly)X +3957(in)X +4076(the)X +2418 4412(memory-resident)N +3000(case)X +3173(that)X +3326(the)X +3457(results)X +3699(are)X +3831(uninterest-)X +2418 4500(ing.)N +2589(However,)X +2933(for)X +3056(the)X +3183(larger)X +3400(database)X +3706(the)X +3833(new)X +3995(pack-)X +2418 4588(age)N +2567(pays)X +2751(a)X +2824(small)X +3033(penalty)X +3305(in)X +3403(system)X +3661(time)X +3839(because)X +4130(it)X +2418 4676(limits)N +2636(its)X +2748(main)X +2944(memory)X +3247(utilization)X +3607(and)X +3759(swaps)X +3991(pages)X +2418 4764(out)N +2550(to)X +2642(temporary)X +3002(storage)X +3264(in)X +3356(the)X +3484(\256le)X +3616(system)X +3868(while)X +4076(the)X +2 f +2418 4852(hsearch)N +1 f +2698(package)X +2988(requires)X +3273(that)X +3419(the)X +3543(application)X +3924(allocate)X +2418 4940(enough)N +2692(space)X +2909(for)X +3041(all)X +3159(key/data)X +3468(pair.)X +3670(However,)X +4022(even)X +2418 5028(with)N +2600(the)X +2738(system)X +3000(time)X +3182(penalty,)X +3477(the)X +3614(resulting)X +3933(elapsed)X +2418 5116(time)N +2580(improves)X +2898(by)X +2998(over)X +3161(50%.)X +3 f +432 5960(10)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +11 p +%%Page: 11 11 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +10 f +908 454(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1379 546(hash)N +1652(ndbm)X +1950(%change)X +1 f +10 f +908 550(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 642(CREATE)N +10 f +908 646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 738(user)N +1424(6.4)X +1671(12.2)X +2073(48)X +1157 826(sys)N +1384(32.5)X +1671(34.7)X +2113(6)X +3 f +1006 914(elapsed)N +10 f +1310 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1384 914(90.4)N +10 f +1581 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1671 914(99.6)N +10 f +1883 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +2113 914(9)N +1 f +10 f +908 910(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 926(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1010(READ)N +10 f +908 1014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1106(user)N +1424(3.4)X +1711(6.1)X +2073(44)X +1157 1194(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1282(elapsed)N +10 f +1310 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1424 1282(4.0)N +10 f +1581 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1671 1282(21.2)N +10 f +1883 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +2073 1282(81)N +1 f +10 f +908 1278(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1294(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1378(VERIFY)N +10 f +908 1382(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1474(user)N +1424(3.5)X +1711(6.3)X +2073(44)X +1157 1562(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1650(elapsed)N +10 f +1310 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1424 1650(4.0)N +10 f +1581 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1671 1650(21.2)N +10 f +1883 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +2073 1650(81)N +1 f +10 f +908 1646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1662(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1746(SEQUENTIAL)N +10 f +908 1750(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1842(user)N +1424(2.7)X +1711(1.9)X +2046(-42)X +1157 1930(sys)N +1424(0.7)X +1711(3.9)X +2073(82)X +3 f +1006 2018(elapsed)N +10 f +1310 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1424 2018(3.0)N +10 f +1581 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1711 2018(5.0)N +10 f +1883 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +2073 2018(40)N +1 f +10 f +908 2014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 2030(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 2114(SEQUENTIAL)N +1467(\(with)X +1656(data)X +1810(retrieval\))X +10 f +908 2118(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 2210(user)N +1424(2.7)X +1711(8.2)X +2073(67)X +1157 2298(sys)N +1424(0.7)X +1711(4.3)X +2073(84)X +3 f +1006 2386(elapsed)N +1424(3.0)X +1671(12.0)X +2073(75)X +1 f +10 f +908 2390(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +899 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +1310 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1581 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1883 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +2278 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +905 2574(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1318 2666(hash)N +1585(hsearch)X +1953(%change)X +1 f +10 f +905 2670(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +945 2762(CREATE/READ)N +10 f +905 2766(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1064 2858(user)N +1343(6.6)X +1642(17.2)X +2096(62)X +1096 2946(sys)N +1343(1.1)X +1682(0.3)X +2029(-266)X +3 f +945 3034(elapsed)N +1343(7.8)X +1642(17.0)X +2096(54)X +1 f +10 f +905 3038(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +896 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +1249 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1520 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1886 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +2281 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +3 f +720 3174(Figure)N +967(8a:)X +1 f +1094(Timing)X +1349(results)X +1578(for)X +1692(the)X +1810(dictionary)X +2155(database.)X +10 f +720 3262 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1407 3504(Conclusion)N +1 f +892 3636(This)N +1063(paper)X +1271(has)X +1407(presented)X +1744(the)X +1871(design,)X +2129(implemen-)X +720 3724(tation)N +928(and)X +1070(performance)X +1503(of)X +1596(a)X +1658(new)X +1818(hashing)X +2093(package)X +2382(for)X +720 3812(UNIX.)N +993(The)X +1150(new)X +1316(package)X +1612(provides)X +1919(a)X +1986(superset)X +2280(of)X +2378(the)X +720 3900(functionality)N +1159(of)X +1255(existing)X +1537(hashing)X +1815(packages)X +2139(and)X +2284(incor-)X +720 3988(porates)N +975(additional)X +1318(features)X +1596(such)X +1766(as)X +1855(large)X +2038(key)X +2176(handling,)X +720 4076(user)N +876(de\256ned)X +1134(hash)X +1302(functions,)X +1641(multiple)X +1928(hash)X +2096(tables,)X +2324(vari-)X +720 4164(able)N +894(sized)X +1099(pages,)X +1342(and)X +1498(linear)X +1721(hashing.)X +2050(In)X +2156(nearly)X +2396(all)X +720 4252(cases,)N +954(the)X +1096(new)X +1274(package)X +1582(provides)X +1902(improved)X +2252(perfor-)X +720 4340(mance)N +974(on)X +1098(the)X +1240(order)X +1454(of)X +1565(50-80%)X +1863(for)X +2001(the)X +2142(workloads)X +720 4428(shown.)N +990(Applications)X +1420(such)X +1588(as)X +1676(the)X +1794(loader,)X +2035(compiler,)X +2360(and)X +720 4516(mail,)N +921(which)X +1156(currently)X +1485(implement)X +1866(their)X +2051(own)X +2227(hashing)X +720 4604(routines,)N +1032(should)X +1279(be)X +1389(modi\256ed)X +1706(to)X +1801(use)X +1941(the)X +2072(generic)X +2342(rou-)X +720 4692(tines.)N +892 4806(This)N +1087(hashing)X +1389(package)X +1705(is)X +1810(one)X +1978(access)X +2236(method)X +720 4894(which)N +953(is)X +1043(part)X +1205(of)X +1309(a)X +1382(generic)X +1656(database)X +1970(access)X +2212(package)X +720 4982(being)N +955(developed)X +1342(at)X +1457(the)X +1612(University)X +2007(of)X +2131(California,)X +720 5070(Berkeley.)N +1089(It)X +1177(will)X +1340(include)X +1614(a)X +1688(btree)X +1887(access)X +2131(method)X +2409(as)X +720 5158(well)N +916(as)X +1041(\256xed)X +1259(and)X +1433(variable)X +1750(length)X +2007(record)X +2270(access)X +720 5246(methods)N +1024(in)X +1119(addition)X +1414(to)X +1509(the)X +1640(hashed)X +1896(support)X +2168(presented)X +720 5334(here.)N +948(All)X +1099(of)X +1215(the)X +1361(access)X +1615(methods)X +1934(are)X +2081(based)X +2312(on)X +2440(a)X +720 5422(key/data)N +1037(pair)X +1207(interface)X +1533(and)X +1693(appear)X +1952(identical)X +2272(to)X +2378(the)X +720 5510(application)N +1121(layer,)X +1347(allowing)X +1671(application)X +2071(implementa-)X +720 5598(tions)N +906(to)X +999(be)X +1106(largely)X +1360(independent)X +1783(of)X +1881(the)X +2010(database)X +2318(type.)X +720 5686(The)N +873(package)X +1165(is)X +1246(expected)X +1560(to)X +1650(be)X +1754(an)X +1858(integral)X +2131(part)X +2284(of)X +2378(the)X +2706 538(4.4BSD)N +3006(system,)X +3293(with)X +3479(various)X +3759(standard)X +4075(applications)X +2706 626(such)N +2879(as)X +2972(more\(1\),)X +3277(sort\(1\))X +3517(and)X +3659(vi\(1\))X +3841(based)X +4050(on)X +4156(it.)X +4266(While)X +2706 714(the)N +2833(current)X +3089(design)X +3326(does)X +3501(not)X +3631(support)X +3899(multi-user)X +4256(access)X +2706 802(or)N +2804(transactions,)X +3238(they)X +3407(could)X +3616(be)X +3723(incorporated)X +4159(relatively)X +2706 890(easily.)N +10 f +2894 938(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3365 1030(hash)N +3638(ndbm)X +3936(%change)X +1 f +10 f +2894 1034(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1126(CREATE)N +10 f +2894 1130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1222(user)N +3390(0.2)X +3677(0.4)X +4079(50)X +3143 1310(sys)N +3390(0.1)X +3677(1.0)X +4079(90)X +3 f +2992 1398(elapsed)N +10 f +3296 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3390 1398(0)N +10 f +3567 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3677 1398(3.2)N +10 f +3869 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +4039 1398(100)N +1 f +10 f +2894 1394(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1410(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1494(READ)N +10 f +2894 1498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1590(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 1678(sys)N +3390(0.1)X +3677(0.4)X +4079(75)X +3 f +2992 1766(elapsed)N +10 f +3296 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3390 1766(0.0)N +10 f +3567 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3677 1766(0.0)N +10 f +3869 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +4119 1766(0)N +1 f +10 f +2894 1762(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1778(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1862(VERIFY)N +10 f +2894 1866(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1958(user)N +3390(0.1)X +3677(0.2)X +4079(50)X +3143 2046(sys)N +3390(0.1)X +3677(0.3)X +4079(67)X +3 f +2992 2134(elapsed)N +10 f +3296 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3390 2134(0.0)N +10 f +3567 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3677 2134(0.0)N +10 f +3869 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +4119 2134(0)N +1 f +10 f +2894 2130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2146(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2230(SEQUENTIAL)N +10 f +2894 2234(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2326(user)N +3390(0.1)X +3677(0.0)X +4012(-100)X +3143 2414(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2502(elapsed)N +10 f +3296 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3390 2502(0.0)N +10 f +3567 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3677 2502(0.0)N +10 f +3869 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +4119 2502(0)N +1 f +10 f +2894 2498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2514(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2598(SEQUENTIAL)N +3453(\(with)X +3642(data)X +3796(retrieval\))X +10 f +2894 2602(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2694(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 2782(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2870(elapsed)N +3390(0.0)X +3677(0.0)X +4119(0)X +1 f +10 f +2894 2874(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2885 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +3296 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3567 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3869 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +4264 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +2891 3058(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3304 3150(hash)N +3571(hsearch)X +3939(%change)X +1 f +10 f +2891 3154(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2931 3246(CREATE/READ)N +10 f +2891 3250(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3050 3342(user)N +3329(0.3)X +3648(0.4)X +4048(25)X +3082 3430(sys)N +3329(0.0)X +3648(0.0)X +4088(0)X +3 f +2931 3518(elapsed)N +3329(0.0)X +3648(0.0)X +4088(0)X +1 f +10 f +2891 3522(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2882 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3235 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3506 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3872 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +4267 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3 f +2706 3658(Figure)N +2953(8b:)X +1 f +3084(Timing)X +3339(results)X +3568(for)X +3682(the)X +3800(password)X +4123(database.)X +10 f +2706 3746 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3396 3988(References)N +1 f +2706 4120([ATT79])N +3058(AT&T,)X +3358(DBM\(3X\),)X +2 f +3773(Unix)X +3990(Programmer's)X +2878 4208(Manual,)N +3194(Seventh)X +3491(Edition,)X +3793(Volume)X +4085(1)X +1 f +(,)S +4192(January,)X +2878 4296(1979.)N +2706 4472([ATT85])N +3027(AT&T,)X +3296(HSEARCH\(BA_LIB\),)X +2 f +4053(Unix)X +4239(System)X +2878 4560(User's)N +3112(Manual,)X +3401(System)X +3644(V.3)X +1 f +3753(,)X +3793(pp.)X +3913(506-508,)X +4220(1985.)X +2706 4736([BRE73])N +3025(Brent,)X +3253(Richard)X +3537(P.,)X +3651(``Reducing)X +4041(the)X +4168(Retrieval)X +2878 4824(Time)N +3071(of)X +3162(Scatter)X +3409(Storage)X +3678(Techniques'',)X +2 f +4146(Commun-)X +2878 4912(ications)N +3175(of)X +3281(the)X +3422(ACM)X +1 f +3591(,)X +3654(Volume)X +3955(16,)X +4098(No.)X +4259(2,)X +4362(pp.)X +2878 5000(105-109,)N +3185(February,)X +3515(1973.)X +2706 5176([BSD86])N +3055(NDBM\(3\),)X +2 f +3469(4.3BSD)X +3775(Unix)X +3990(Programmer's)X +2878 5264(Manual)N +3155(Reference)X +3505(Guide)X +1 f +3701(,)X +3749(University)X +4114(of)X +4208(Califor-)X +2878 5352(nia,)N +3016(Berkeley,)X +3346(1986.)X +2706 5528([ENB88])N +3025(Enbody,)X +3319(R.)X +3417(J.,)X +3533(Du,)X +3676(H.)X +3779(C.,)X +3897(``Dynamic)X +4270(Hash-)X +2878 5616(ing)N +3034(Schemes'',)X +2 f +3427(ACM)X +3630(Computing)X +4019(Surveys)X +1 f +4269(,)X +4322(Vol.)X +2878 5704(20,)N +2998(No.)X +3136(2,)X +3216(pp.)X +3336(85-113,)X +3603(June)X +3770(1988.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(11)X + +12 p +%%Page: 12 12 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538([FAG79])N +776(Ronald)X +1057(Fagin,)X +1308(Jurg)X +1495(Nievergelt,)X +1903(Nicholas)X +604 626(Pippenger,)N +1003(H.)X +1135(Raymond)X +1500(Strong,)X +1787(``Extendible)X +604 714(Hashing)N +901(--)X +985(A)X +1073(Fast)X +1236(Access)X +1493(Method)X +1771(for)X +1894(Dynamic)X +604 802(Files'',)N +2 f +855(ACM)X +1046(Transactions)X +1485(on)X +1586(Database)X +1914(Systems)X +1 f +2168(,)X +604 890(Volume)N +882(4,)X +962(No.)X +1100(3.,)X +1200(September)X +1563(1979,)X +1763(pp)X +1863(315-34)X +432 1066([KNU68],)N +802(Knuth,)X +1064(D.E.,)X +2 f +1273(The)X +1434(Art)X +1577(of)X +1680(Computer)X +2041(Pro-)X +604 1154(gramming)N +971(Vol.)X +1140(3:)X +1245(Sorting)X +1518(and)X +1676(Searching)X +1 f +2001(,)X +2058(sec-)X +604 1242(tions)N +779(6.3-6.4,)X +1046(pp)X +1146(481-550.)X +432 1418([LAR78])N +747(Larson,)X +1011(Per-Ake,)X +1319(``Dynamic)X +1687(Hashing'',)X +2 f +2048(BIT)X +1 f +(,)S +604 1506(Vol.)N +764(18,)X +884(1978,)X +1084(pp.)X +1204(184-201.)X +432 1682([LAR88])N +752(Larson,)X +1021(Per-Ake,)X +1335(``Dynamic)X +1709(Hash)X +1900(Tables'',)X +2 f +604 1770(Communications)N +1183(of)X +1281(the)X +1415(ACM)X +1 f +1584(,)X +1640(Volume)X +1934(31,)X +2070(No.)X +604 1858(4.,)N +704(April)X +893(1988,)X +1093(pp)X +1193(446-457.)X +432 2034([LIT80])N +731(Witold,)X +1013(Litwin,)X +1286(``Linear)X +1590(Hashing:)X +1939(A)X +2036(New)X +604 2122(Tool)N +786(for)X +911(File)X +1065(and)X +1211(Table)X +1424(Addressing'',)X +2 f +1893(Proceed-)X +604 2210(ings)N +761(of)X +847(the)X +969(6th)X +1095(International)X +1540(Conference)X +1933(on)X +2036(Very)X +604 2298(Large)N +815(Databases)X +1 f +1153(,)X +1193(1980.)X +432 2474([NEL90])N +743(Nelson,)X +1011(Philip)X +1222(A.,)X +2 f +1341(Gdbm)X +1558(1.4)X +1679(source)X +1913(distribu-)X +604 2562(tion)N +748(and)X +888(README)X +1 f +1209(,)X +1249(August)X +1500(1990.)X +432 2738([THOM90])N +840(Ken)X +1011(Thompson,)X +1410(private)X +1670(communication,)X +604 2826(Nov.)N +782(1990.)X +432 3002([TOR87])N +790(Torek,)X +1066(C.,)X +1222(``Re:)X +1470(dbm.a)X +1751(and)X +1950(ndbm.a)X +604 3090(archives'',)N +2 f +966(USENET)X +1279(newsgroup)X +1650(comp.unix)X +1 f +2002(1987.)X +432 3266([TOR88])N +760(Torek,)X +1006(C.,)X +1133(``Re:)X +1351(questions)X +1686(regarding)X +2027(data-)X +604 3354(bases)N +826(created)X +1106(with)X +1295(dbm)X +1484(and)X +1647(ndbm)X +1876(routines'')X +2 f +604 3442(USENET)N +937(newsgroup)X +1328(comp.unix.questions)X +1 f +1982(,)X +2041(June)X +604 3530(1988.)N +432 3706([WAL84])N +773(Wales,)X +1018(R.,)X +1135(``Discussion)X +1564(of)X +1655("dbm")X +1887(data)X +2045(base)X +604 3794(system'',)N +2 f +973(USENET)X +1339(newsgroup)X +1762(unix.wizards)X +1 f +2168(,)X +604 3882(January,)N +894(1984.)X +432 4058([YIG89])N +751(Ozan)X +963(S.)X +1069(Yigit,)X +1294(``How)X +1545(to)X +1648(Roll)X +1826(Your)X +2032(Own)X +604 4146(Dbm/Ndbm'',)N +2 f +1087(unpublished)X +1504(manuscript)X +1 f +(,)S +1910(Toronto,)X +604 4234(July,)N +777(1989)X +3 f +432 5960(12)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +13 p +%%Page: 13 13 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(Margo)N +960(I.)X +1033(Seltzer)X +1282(is)X +1361(a)X +1423(Ph.D.)X +1631(student)X +1887(in)X +1974(the)X +2097(Department)X +720 626(of)N +823(Electrical)X +1167(Engineering)X +1595(and)X +1747(Computer)X +2102(Sciences)X +2418(at)X +720 714(the)N +850(University)X +1220(of)X +1318(California,)X +1694(Berkeley.)X +2055(Her)X +2207(research)X +720 802(interests)N +1017(include)X +1283(\256le)X +1415(systems,)X +1718(databases,)X +2076(and)X +2221(transac-)X +720 890(tion)N +896(processing)X +1291(systems.)X +1636(She)X +1807(spent)X +2027(several)X +2306(years)X +720 978(working)N +1026(at)X +1123(startup)X +1380(companies)X +1762(designing)X +2112(and)X +2267(imple-)X +720 1066(menting)N +1048(\256le)X +1216(systems)X +1535(and)X +1716(transaction)X +2133(processing)X +720 1154(software)N +1026(and)X +1170(designing)X +1509(microprocessors.)X +2103(Ms.)X +2253(Seltzer)X +720 1242(received)N +1057(her)X +1223(AB)X +1397(in)X +1522(Applied)X +1843(Mathematics)X +2320(from)X +720 1330 0.1953(Harvard/Radcliffe)AN +1325(College)X +1594(in)X +1676(1983.)X +720 1444(In)N +810(her)X +936(spare)X +1129(time,)X +1313(Margo)X +1549(can)X +1683(usually)X +1936(be)X +2034(found)X +2243(prepar-)X +720 1532(ing)N +868(massive)X +1171(quantities)X +1527(of)X +1639(food)X +1831(for)X +1970(hungry)X +2242(hoards,)X +720 1620(studying)N +1022(Japanese,)X +1355(or)X +1449(playing)X +1716(soccer)X +1948(with)X +2116(an)X +2218(exciting)X +720 1708(Bay)N +912(Area)X +1132(Women's)X +1507(Soccer)X +1788(team,)X +2026(the)X +2186(Berkeley)X +720 1796(Bruisers.)N +720 1910(Ozan)N +915(\()X +3 f +942(Oz)X +1 f +1040(\))X +1092(Yigit)X +1281(is)X +1358(currently)X +1672(a)X +1732(software)X +2033(engineer)X +2334(with)X +720 1998(the)N +886(Communications)X +1499(Research)X +1861(and)X +2044(Development)X +720 2086(group,)N +948(Computing)X +1328(Services,)X +1641(York)X +1826(University.)X +2224(His)X +2355(for-)X +720 2174(mative)N +967(years)X +1166(were)X +1352(also)X +1510(spent)X +1708(at)X +1795(York,)X +2009(where)X +2234(he)X +2338(held)X +720 2262(system)N +985(programmer)X +1425(and)X +1583(administrator)X +2052(positions)X +2382(for)X +720 2350(various)N +995(mixtures)X +1314(of)X +1420(of)X +1526(UNIX)X +1765(systems)X +2056(starting)X +2334(with)X +720 2438(Berkeley)N +1031(4.1)X +1151(in)X +1233(1982,)X +1433(while)X +1631(at)X +1709(the)X +1827(same)X +2012(time)X +2174(obtaining)X +720 2526(a)N +776(degree)X +1011(in)X +1093(Computer)X +1433(Science.)X +720 2640(In)N +813(his)X +931(copious)X +1205(free)X +1356(time,)X +1543(Oz)X +1662(enjoys)X +1896(working)X +2188(on)X +2293(what-)X +720 2728(ever)N +890(software)X +1197(looks)X +1400(interesting,)X +1788(which)X +2014(often)X +2209(includes)X +720 2816(language)N +1044(interpreters,)X +1464(preprocessors,)X +1960(and)X +2110(lately,)X +2342(pro-)X +720 2904(gram)N +905(generators)X +1260(and)X +1396(expert)X +1617(systems.)X +720 3018(Oz)N +836(has)X +964(authored)X +1266(several)X +1515(public-domain)X +2003(software)X +2301(tools,)X +720 3106(including)N +1069(an)X +1191(nroff-like)X +1545(text)X +1711(formatter)X +2 f +2056(proff)X +1 f +2257(that)X +2423(is)X +720 3194(apparently)N +1083(still)X +1226(used)X +1397(in)X +1483(some)X +1676(basement)X +2002(PCs.)X +2173(His)X +2307(latest)X +720 3282(obsessions)N +1143(include)X +1460(the)X +1639(incredible)X +2040(programming)X +720 3370(language)N +1030(Scheme,)X +1324(and)X +1460(Chinese)X +1738(Brush)X +1949(painting.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(13)X + +14 p +%%Page: 14 14 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 5960(14)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +14 p +%%Trailer +xt + +xs diff --git a/lib/libc/db/docs/libtp.usenix.ps b/lib/libc/db/docs/libtp.usenix.ps new file mode 100644 index 0000000..ea821a9 --- /dev/null +++ b/lib/libc/db/docs/libtp.usenix.ps @@ -0,0 +1,12340 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Thu Dec 12 15:32:11 1991 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header: psdit.pro,v 2.1 85/11/24 12:19:43 shore Rel $ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8] +[8]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +14 s +1205 1206(LIBTP:)N +1633(Portable,)X +2100(M)X +2206(odular)X +2551(Transactions)X +3202(for)X +3374(UNIX)X +1 f +11 s +3661 1162(1)N +2 f +12 s +2182 1398(Margo)N +2467(Seltzer)X +2171 1494(Michael)N +2511(Olson)X +1800 1590(University)N +2225(of)X +2324(California,)X +2773(Berkeley)X +3 f +2277 1878(Abstract)N +1 f +10 s +755 2001(Transactions)N +1198(provide)X +1475(a)X +1543(useful)X +1771(programming)X +2239(paradigm)X +2574(for)X +2700(maintaining)X +3114(logical)X +3364(consistency,)X +3790(arbitrating)X +4156(con-)X +555 2091(current)N +808(access,)X +1059(and)X +1200(managing)X +1540(recovery.)X +1886(In)X +1977(traditional)X +2330(UNIX)X +2555(systems,)X +2852(the)X +2974(only)X +3140(easy)X +3307(way)X +3465(of)X +3556(using)X +3753(transactions)X +4160(is)X +4237(to)X +555 2181(purchase)N +876(a)X +947(database)X +1258(system.)X +1554(Such)X +1748(systems)X +2035(are)X +2168(often)X +2367(slow,)X +2572(costly,)X +2817(and)X +2967(may)X +3139(not)X +3275(provide)X +3554(the)X +3686(exact)X +3890(functionality)X +555 2271(desired.)N +848(This)X +1011(paper)X +1210(presents)X +1493(the)X +1611(design,)X +1860(implementation,)X +2402(and)X +2538(performance)X +2965(of)X +3052(LIBTP,)X +3314(a)X +3370(simple,)X +3623(non-proprietary)X +4147(tran-)X +555 2361(saction)N +809(library)X +1050(using)X +1249(the)X +1373(4.4BSD)X +1654(database)X +1957(access)X +2189(routines)X +2473(\()X +3 f +2500(db)X +1 f +2588(\(3\)\).)X +2775(On)X +2899(a)X +2961(conventional)X +3401(transaction)X +3779(processing)X +4148(style)X +555 2451(benchmark,)N +959(its)X +1061(performance)X +1495(is)X +1575(approximately)X +2065(85%)X +2239(that)X +2386(of)X +2480(the)X +2604(database)X +2907(access)X +3139(routines)X +3423(without)X +3693(transaction)X +4071(protec-)X +555 2541(tion,)N +725(200%)X +938(that)X +1084(of)X +1177(using)X +3 f +1376(fsync)X +1 f +1554(\(2\))X +1674(to)X +1761(commit)X +2030(modi\256cations)X +2490(to)X +2577(disk,)X +2755(and)X +2896(125%)X +3108(that)X +3253(of)X +3345(a)X +3406(commercial)X +3810(relational)X +4138(data-)X +555 2631(base)N +718(system.)X +3 f +555 2817(1.)N +655(Introduction)X +1 f +755 2940(Transactions)N +1186(are)X +1306(used)X +1474(in)X +1557(database)X +1855(systems)X +2129(to)X +2212(enable)X +2443(concurrent)X +2807(users)X +2992(to)X +3074(apply)X +3272(multi-operation)X +3790(updates)X +4055(without)X +555 3030(violating)N +863(the)X +985(integrity)X +1280(of)X +1371(the)X +1493(database.)X +1814(They)X +2003(provide)X +2271(the)X +2392(properties)X +2736(of)X +2826(atomicity,)X +3171(consistency,)X +3588(isolation,)X +3906(and)X +4045(durabil-)X +555 3120(ity.)N +701(By)X +816(atomicity,)X +1160(we)X +1276(mean)X +1472(that)X +1614(the)X +1734(set)X +1845(of)X +1934(updates)X +2200(comprising)X +2581(a)X +2638(transaction)X +3011(must)X +3187(be)X +3284(applied)X +3541(as)X +3629(a)X +3686(single)X +3898(unit;)X +4085(that)X +4226(is,)X +555 3210(they)N +714(must)X +890(either)X +1094(all)X +1195(be)X +1292(applied)X +1549(to)X +1632(the)X +1751(database)X +2049(or)X +2137(all)X +2238(be)X +2335(absent.)X +2601(Consistency)X +3013(requires)X +3293(that)X +3434(a)X +3491(transaction)X +3864(take)X +4019(the)X +4138(data-)X +555 3300(base)N +725(from)X +908(one)X +1051(logically)X +1358(consistent)X +1704(state)X +1877(to)X +1965(another.)X +2272(The)X +2423(property)X +2721(of)X +2814(isolation)X +3115(requires)X +3400(that)X +3546(concurrent)X +3916(transactions)X +555 3390(yield)N +750(results)X +994(which)X +1225(are)X +1358(indistinguishable)X +1938(from)X +2128(the)X +2260(results)X +2503(which)X +2733(would)X +2967(be)X +3077(obtained)X +3387(by)X +3501(running)X +3784(the)X +3916(transactions)X +555 3480(sequentially.)N +1002(Finally,)X +1268(durability)X +1599(requires)X +1878(that)X +2018(once)X +2190(transactions)X +2593(have)X +2765(been)X +2937(committed,)X +3319(their)X +3486(results)X +3715(must)X +3890(be)X +3986(preserved)X +555 3570(across)N +776(system)X +1018(failures)X +1279([TPCB90].)X +755 3693(Although)N +1080(these)X +1268(properties)X +1612(are)X +1734(most)X +1912(frequently)X +2265(discussed)X +2595(in)X +2680(the)X +2801(context)X +3060(of)X +3150(databases,)X +3501(they)X +3661(are)X +3782(useful)X +4000(program-)X +555 3783(ming)N +750(paradigms)X +1114(for)X +1238(more)X +1433(general)X +1700(purpose)X +1984(applications.)X +2441(There)X +2659(are)X +2788(several)X +3046(different)X +3353(situations)X +3689(where)X +3916(transactions)X +555 3873(can)N +687(be)X +783(used)X +950(to)X +1032(replace)X +1285(current)X +1533(ad-hoc)X +1772(mechanisms.)X +755 3996(One)N +910(situation)X +1206(is)X +1280(when)X +1475(multiple)X +1762(\256les)X +1916(or)X +2004(parts)X +2181(of)X +2269(\256les)X +2422(need)X +2594(to)X +2676(be)X +2772(updated)X +3046(in)X +3128(an)X +3224(atomic)X +3462(fashion.)X +3758(For)X +3889(example,)X +4201(the)X +555 4086(traditional)N +907(UNIX)X +1131(\256le)X +1256(system)X +1501(uses)X +1661(ordering)X +1955(constraints)X +2324(to)X +2408(achieve)X +2676(recoverability)X +3144(in)X +3228(the)X +3348(face)X +3505(of)X +3594(crashes.)X +3893(When)X +4107(a)X +4165(new)X +555 4176(\256le)N +678(is)X +752(created,)X +1026(its)X +1122(inode)X +1321(is)X +1395(written)X +1642(to)X +1724(disk)X +1877(before)X +2103(the)X +2221(new)X +2375(\256le)X +2497(is)X +2570(added)X +2782(to)X +2864(the)X +2982(directory)X +3292(structure.)X +3633(This)X +3795(guarantees)X +4159(that,)X +555 4266(if)N +627(the)X +748(system)X +993(crashes)X +1253(between)X +1544(the)X +1665(two)X +1808(I/O's,)X +2016(the)X +2137(directory)X +2450(does)X +2620(not)X +2744(contain)X +3002(a)X +3060 0.4531(reference)AX +3383(to)X +3467(an)X +3565(invalid)X +3809(inode.)X +4049(In)X +4138(actu-)X +555 4356(ality,)N +741(the)X +863(desired)X +1119(effect)X +1326(is)X +1402(that)X +1545(these)X +1733(two)X +1876(updates)X +2144(have)X +2319(the)X +2440(transactional)X +2873(property)X +3168(of)X +3258(atomicity)X +3583(\(either)X +3816(both)X +3981(writes)X +4200(are)X +555 4446(visible)N +790(or)X +879(neither)X +1124(is\).)X +1266(Rather)X +1501(than)X +1660(building)X +1947(special)X +2191(purpose)X +2466(recovery)X +2769(mechanisms)X +3186(into)X +3331(the)X +3450(\256le)X +3573(system)X +3816(or)X +3904(related)X +4144(tools)X +555 4536(\()N +2 f +582(e.g.)X +3 f +726(fsck)X +1 f +864(\(8\)\),)X +1033(one)X +1177(could)X +1383(use)X +1518(general)X +1783(purpose)X +2064(transaction)X +2443(recovery)X +2752(protocols)X +3077(after)X +3252(system)X +3501(failure.)X +3778(Any)X +3943(application)X +555 4626(that)N +705(needs)X +918(to)X +1010(keep)X +1192(multiple,)X +1508(related)X +1757(\256les)X +1920(\(or)X +2044(directories\))X +2440(consistent)X +2790(should)X +3032(do)X +3141(so)X +3241(using)X +3443(transactions.)X +3895(Source)X +4147(code)X +555 4716(control)N +805(systems,)X +1101(such)X +1271(as)X +1361(RCS)X +1534(and)X +1673(SCCS,)X +1910(should)X +2146(use)X +2276(transaction)X +2651(semantics)X +2990(to)X +3075(allow)X +3276(the)X +3397(``checking)X +3764(in'')X +3903(of)X +3992(groups)X +4232(of)X +555 4806(related)N +801(\256les.)X +1001(In)X +1095(this)X +1237(way,)X +1418(if)X +1493(the)X +1617 0.2841(``check-in'')AX +2028(fails,)X +2212(the)X +2336(transaction)X +2714(may)X +2878(be)X +2980(aborted,)X +3267(backing)X +3547(out)X +3675(the)X +3799(partial)X +4030(``check-)X +555 4896(in'')N +691(leaving)X +947(the)X +1065(source)X +1295(repository)X +1640(in)X +1722(a)X +1778(consistent)X +2118(state.)X +755 5019(A)N +842(second)X +1094(situation)X +1398(where)X +1624(transactions)X +2036(can)X +2177(be)X +2282(used)X +2458(to)X +2549(replace)X +2811(current)X +3068(ad-hoc)X +3316(mechanisms)X +3741(is)X +3822(in)X +3912(applications)X +555 5109(where)N +776(concurrent)X +1144(updates)X +1413(to)X +1499(a)X +1559(shared)X +1793(\256le)X +1919(are)X +2042(desired,)X +2318(but)X +2444(there)X +2629(is)X +2706(logical)X +2948(consistency)X +3345(of)X +3435(the)X +3556(data)X +3713(which)X +3932(needs)X +4138(to)X +4223(be)X +555 5199(preserved.)N +928(For)X +1059(example,)X +1371(when)X +1565(the)X +1683(password)X +2006(\256le)X +2128(is)X +2201(updated,)X +2495(\256le)X +2617(locking)X +2877(is)X +2950(used)X +3117(to)X +3199(disallow)X +3490(concurrent)X +3854(access.)X +4120(Tran-)X +555 5289(saction)N +804(semantics)X +1142(on)X +1244(the)X +1364(password)X +1689(\256les)X +1844(would)X +2066(allow)X +2266(concurrent)X +2632(updates,)X +2919(while)X +3119(preserving)X +3479(the)X +3598(logical)X +3837(consistency)X +4232(of)X +555 5379(the)N +681(password)X +1012(database.)X +1357(Similarly,)X +1702(UNIX)X +1930(utilities)X +2196(which)X +2419(rewrite)X +2674(\256les)X +2834(face)X +2996(a)X +3059(potential)X +3366(race)X +3528(condition)X +3857(between)X +4152(their)X +555 5469(rewriting)N +871(a)X +929(\256le)X +1053(and)X +1191(another)X +1453(process)X +1715(reading)X +1977(the)X +2096(\256le.)X +2259(For)X +2391(example,)X +2704(the)X +2823(compiler)X +3129(\(more)X +3342(precisely,)X +3673(the)X +3792(assembler\))X +4161(may)X +8 s +10 f +555 5541(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5619(1)N +8 s +763 5644(To)N +850(appear)X +1035(in)X +1101(the)X +2 f +1195(Proceedings)X +1530(of)X +1596(the)X +1690(1992)X +1834(Winter)X +2024(Usenix)X +1 f +2201(,)X +2233(San)X +2345(Francisco,)X +2625(CA,)X +2746(January)X +2960(1992.)X + +2 p +%%Page: 2 2 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(have)N +737(to)X +829(rewrite)X +1087(a)X +1152(\256le)X +1283(to)X +1374(which)X +1599(it)X +1672(has)X +1808(write)X +2002(permission)X +2382(in)X +2473(a)X +2538(directory)X +2857(to)X +2948(which)X +3173(it)X +3246(does)X +3422(not)X +3553(have)X +3734(write)X +3928(permission.)X +555 720(While)N +779(the)X +904(``.o'')X +1099(\256le)X +1228(is)X +1308(being)X +1513(written,)X +1787(another)X +2055(utility)X +2272(such)X +2446(as)X +3 f +2540(nm)X +1 f +2651(\(1\))X +2772(or)X +3 f +2866(ar)X +1 f +2942(\(1\))X +3063(may)X +3228(read)X +3394(the)X +3519(\256le)X +3648(and)X +3791(produce)X +4077(invalid)X +555 810(results)N +790(since)X +981(the)X +1105(\256le)X +1233(has)X +1366(not)X +1494(been)X +1672(completely)X +2054(written.)X +2347(Currently,)X +2700(some)X +2895(utilities)X +3160(use)X +3293(special)X +3542(purpose)X +3821(code)X +3998(to)X +4085(handle)X +555 900(such)N +722(cases)X +912(while)X +1110(others)X +1326(ignore)X +1551(the)X +1669(problem)X +1956(and)X +2092(force)X +2278(users)X +2463(to)X +2545(live)X +2685(with)X +2847(the)X +2965(consequences.)X +755 1023(In)N +845(this)X +983(paper,)X +1205(we)X +1322(present)X +1577(a)X +1635(simple)X +1870(library)X +2106(which)X +2324(provides)X +2622(transaction)X +2996(semantics)X +3334(\(atomicity,)X +3705(consistency,)X +4121(isola-)X +555 1113(tion,)N +720(and)X +857(durability\).)X +1236(The)X +1382(4.4BSD)X +1658(database)X +1956(access)X +2182(methods)X +2473(have)X +2645(been)X +2817(modi\256ed)X +3121(to)X +3203(use)X +3330(this)X +3465(library,)X +3719(optionally)X +4063(provid-)X +555 1203(ing)N +682(shared)X +917(buffer)X +1139(management)X +1574(between)X +1867(applications,)X +2298(locking,)X +2582(and)X +2722(transaction)X +3098(semantics.)X +3478(Any)X +3640(UNIX)X +3865(program)X +4161(may)X +555 1293(transaction)N +930(protect)X +1176(its)X +1274(data)X +1430(by)X +1532(requesting)X +1888(transaction)X +2262(protection)X +2609(with)X +2773(the)X +3 f +2893(db)X +1 f +2981(\(3\))X +3097(library)X +3333(or)X +3422(by)X +3524(adding)X +3764(appropriate)X +4152(calls)X +555 1383(to)N +646(the)X +773(transaction)X +1154(manager,)X +1480(buffer)X +1706(manager,)X +2032(lock)X +2199(manager,)X +2525(and)X +2670(log)X +2801(manager.)X +3147(The)X +3301(library)X +3543(routines)X +3829(may)X +3995(be)X +4099(linked)X +555 1473(into)N +708(the)X +834(host)X +995(application)X +1379(and)X +1523(called)X +1743(by)X +1851(subroutine)X +2217(interface,)X +2547(or)X +2642(they)X +2808(may)X +2974(reside)X +3194(in)X +3284(a)X +3348(separate)X +3640(server)X +3865(process.)X +4174(The)X +555 1563(server)N +772(architecture)X +1172(provides)X +1468(for)X +1582(network)X +1865(access)X +2091(and)X +2227(better)X +2430(protection)X +2775(mechanisms.)X +3 f +555 1749(2.)N +655(Related)X +938(Work)X +1 f +755 1872(There)N +1000(has)X +1164(been)X +1373(much)X +1608(discussion)X +1998(in)X +2117(recent)X +2371(years)X +2597(about)X +2831(new)X +3021(transaction)X +3429(models)X +3716(and)X +3888(architectures)X +555 1962 0.1172([SPEC88][NODI90][CHEN91][MOHA91].)AN +2009(Much)X +2220(of)X +2310(this)X +2448(work)X +2636(focuses)X +2900(on)X +3003(new)X +3160(ways)X +3348(to)X +3433(model)X +3656(transactions)X +4062(and)X +4201(the)X +555 2052(interactions)N +953(between)X +1245(them,)X +1449(while)X +1651(the)X +1772(work)X +1960(presented)X +2291(here)X +2453(focuses)X +2717(on)X +2820(the)X +2941(implementation)X +3466(and)X +3605(performance)X +4035(of)X +4125(tradi-)X +555 2142(tional)N +757(transaction)X +1129(techniques)X +1492(\(write-ahead)X +1919(logging)X +2183(and)X +2319(two-phase)X +2669(locking\))X +2956(on)X +3056(a)X +3112(standard)X +3404(operating)X +3727(system)X +3969(\(UNIX\).)X +755 2265(Such)N +947(traditional)X +1308(operating)X +1643(systems)X +1928(are)X +2059(often)X +2256(criticized)X +2587(for)X +2713(their)X +2892(inability)X +3190(to)X +3283(perform)X +3573(transaction)X +3956(processing)X +555 2355(adequately.)N +971([STON81])X +1342(cites)X +1517(three)X +1706(main)X +1894(areas)X +2088(of)X +2183(inadequate)X +2559(support:)X +2849(buffer)X +3074(management,)X +3532(the)X +3658(\256le)X +3788(system,)X +4058(and)X +4201(the)X +555 2445(process)N +823(structure.)X +1191(These)X +1410(arguments)X +1771(are)X +1897(summarized)X +2316(in)X +2405(table)X +2587(one.)X +2769(Fortunately,)X +3184(much)X +3388(has)X +3521(changed)X +3815(since)X +4006(1981.)X +4232(In)X +555 2535(the)N +683(area)X +848(of)X +945(buffer)X +1172(management,)X +1632(most)X +1817(UNIX)X +2048(systems)X +2331(provide)X +2606(the)X +2734(ability)X +2968(to)X +3060(memory)X +3357(map)X +3525(\256les,)X +3708(thus)X +3870(obviating)X +4201(the)X +555 2625(need)N +734(for)X +855(a)X +918(copy)X +1101(between)X +1396(kernel)X +1624(and)X +1766(user)X +1926(space.)X +2171(If)X +2251(a)X +2313(database)X +2616(system)X +2864(is)X +2943(going)X +3151(to)X +3239(use)X +3372(the)X +3496(\256le)X +3624(system)X +3872(buffer)X +4095(cache,)X +555 2715(then)N +719(a)X +781(system)X +1029(call)X +1171(is)X +1250(required.)X +1584(However,)X +1924(if)X +1998(buffering)X +2322(is)X +2400(provided)X +2710(at)X +2793(user)X +2952(level)X +3133(using)X +3331(shared)X +3566(memory,)X +3878(as)X +3970(in)X +4057(LIBTP,)X +555 2805(buffer)N +776(management)X +1210(is)X +1287(only)X +1452(as)X +1542(slow)X +1716(as)X +1806(access)X +2035(to)X +2120(shared)X +2353(memory)X +2643(and)X +2782(any)X +2921(replacement)X +3337(algorithm)X +3671(may)X +3832(be)X +3931(used.)X +4121(Since)X +555 2895(multiple)N +849(processes)X +1185(can)X +1325(access)X +1559(the)X +1685(shared)X +1923(data,)X +2105(prefetching)X +2499(may)X +2665(be)X +2769(accomplished)X +3238(by)X +3346(separate)X +3638(processes)X +3973(or)X +4067(threads)X +555 2985(whose)N +782(sole)X +932(purpose)X +1207(is)X +1281(to)X +1364(prefetch)X +1649(pages)X +1853(and)X +1990(wait)X +2149(on)X +2250(them.)X +2471(There)X +2680(is)X +2754(still)X +2894(no)X +2995(way)X +3150(to)X +3233(enforce)X +3496(write)X +3682(ordering)X +3975(other)X +4161(than)X +555 3075(keeping)N +829(pages)X +1032(in)X +1114(user)X +1268(memory)X +1555(and)X +1691(using)X +1884(the)X +3 f +2002(fsync)X +1 f +2180(\(3\))X +2294(system)X +2536(call)X +2672(to)X +2754(perform)X +3033(synchronous)X +3458(writes.)X +755 3198(In)N +845(the)X +966(area)X +1124(of)X +1214(\256le)X +1339(systems,)X +1635(the)X +1756(fast)X +1895(\256le)X +2020(system)X +2265(\(FFS\))X +2474([MCKU84])X +2871(allows)X +3103(allocation)X +3442(in)X +3527(units)X +3704(up)X +3806(to)X +3890(64KBytes)X +4232(as)X +555 3288(opposed)N +846(to)X +932(the)X +1054(4KByte)X +1327(and)X +1466(8KByte)X +1738(\256gures)X +1979(quoted)X +2220(in)X +2305([STON81].)X +2711(The)X +2859(measurements)X +3341(in)X +3426(this)X +3564(paper)X +3766(were)X +3946(taken)X +4143(from)X +555 3378(an)N +655(8KByte)X +928(FFS,)X +1104(but)X +1230(as)X +1320(LIBTP)X +1565(runs)X +1726(exclusively)X +2114(in)X +2199(user)X +2356(space,)X +2578(there)X +2762(is)X +2838(nothing)X +3105(to)X +3190(prevent)X +3454(it)X +3521(from)X +3700(being)X +3901(run)X +4031(on)X +4134(other)X +555 3468(UNIX)N +776(compatible)X +1152(\256le)X +1274(systems)X +1547(\(e.g.)X +1710(log-structured)X +2180([ROSE91],)X +2558(extent-based,)X +3004(or)X +3091(multi-block)X +3484([SELT91]\).)X +755 3591(Finally,)N +1029(with)X +1199(regard)X +1433(to)X +1523(the)X +1648(process)X +1916(structure,)X +2244(neither)X +2494(context)X +2757(switch)X +2993(time)X +3162(nor)X +3296(scheduling)X +3670(around)X +3920(semaphores)X +555 3681(seems)N +785(to)X +881(affect)X +1099(the)X +1231(system)X +1487(performance.)X +1968(However,)X +2317(the)X +2449(implementation)X +2984(of)X +3084(semaphores)X +3496(can)X +3641(impact)X +3892(performance)X +555 3771(tremendously.)N +1051(This)X +1213(is)X +1286(discussed)X +1613(in)X +1695(more)X +1880(detail)X +2078(in)X +2160(section)X +2407(4.3.)X +755 3894(The)N +908(Tuxedo)X +1181(system)X +1431(from)X +1615(AT&T)X +1861(is)X +1941(a)X +2004(transaction)X +2383(manager)X +2687(which)X +2910(coordinates)X +3307(distributed)X +3676(transaction)X +4055(commit)X +555 3984(from)N +738(a)X +801(variety)X +1051(of)X +1145(different)X +1449(local)X +1632(transaction)X +2011(managers.)X +2386(At)X +2493(this)X +2634(time,)X +2822(LIBTP)X +3070(does)X +3243(not)X +3371(have)X +3549(its)X +3650(own)X +3814(mechanism)X +4205(for)X +555 4074(distributed)N +942(commit)X +1231(processing,)X +1639(but)X +1786(could)X +2009(be)X +2130(used)X +2322(as)X +2434(a)X +2515(local)X +2716(transaction)X +3113(agent)X +3331(by)X +3455(systems)X +3752(such)X +3943(as)X +4054(Tuxedo)X +555 4164([ANDR89].)N +10 f +863 4393(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4483(Buffer)N +1133(Management)X +10 f +1672(g)X +1 f +1720(Data)X +1892(must)X +2067(be)X +2163(copied)X +2397(between)X +2685(kernel)X +2906(space)X +3105(and)X +3241(user)X +3395(space.)X +10 f +1672 4573(g)N +1 f +1720(Buffer)X +1950(pool)X +2112(access)X +2338(is)X +2411(too)X +2533(slow.)X +10 f +1672 4663(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(request)X +2589(prefetch.)X +10 f +1672 4753(g)N +1 f +1720(Replacement)X +2159(is)X +2232(usually)X +2483(LRU)X +2663(which)X +2879(may)X +3037(be)X +3133(suboptimal)X +3508(for)X +3622(databases.)X +10 f +1672 4843(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(guarantee)X +2670(write)X +2855(ordering.)X +10 f +863 4853(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4943(File)N +1047(System)X +10 f +1672(g)X +1 f +1720(Allocation)X +2078(is)X +2151(done)X +2327(in)X +2409(small)X +2602(blocks)X +2831(\(usually)X +3109(4K)X +3227(or)X +3314(8K\).)X +10 f +1672 5033(g)N +1 f +1720(Logical)X +1985(organization)X +2406(of)X +2493(\256les)X +2646(is)X +2719(redundantly)X +3122(expressed.)X +10 f +863 5043(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 5133(Process)N +1168(Structure)X +10 f +1672(g)X +1 f +1720(Context)X +1993(switching)X +2324(and)X +2460(message)X +2752(passing)X +3012(are)X +3131(too)X +3253(slow.)X +10 f +1672 5223(g)N +1 f +1720(A)X +1798(process)X +2059(may)X +2217(be)X +2313(descheduled)X +2730(while)X +2928(holding)X +3192(a)X +3248(semaphore.)X +10 f +863 5233(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +863(c)X +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3990 5233(c)N +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3 f +1156 5446(Table)N +1371(One:)X +1560(Shortcomings)X +2051(of)X +2138(UNIX)X +2363(transaction)X +2770(support)X +3056(cited)X +3241(in)X +3327([STON81].)X + +3 p +%%Page: 3 3 +10 s 10 xH 0 xS 3 f +1 f +755 630(The)N +901(transaction)X +1274(architecture)X +1675(presented)X +2004(in)X +2087([YOUN91])X +2474(is)X +2548(very)X +2712(similar)X +2955(to)X +3038(that)X +3179(implemented)X +3618(in)X +3701(the)X +3820(LIBTP.)X +4103(While)X +555 720([YOUN91])N +947(presents)X +1236(a)X +1298(model)X +1524(for)X +1644(providing)X +1981(transaction)X +2359(services,)X +2663(this)X +2803(paper)X +3007(focuses)X +3273(on)X +3378(the)X +3501(implementation)X +4028(and)X +4169(per-)X +555 810(formance)N +881(of)X +970(a)X +1028(particular)X +1358(system.)X +1642(In)X +1731(addition,)X +2034(we)X +2149(provide)X +2415(detailed)X +2690(comparisons)X +3116(with)X +3279(alternative)X +3639(solutions:)X +3970(traditional)X +555 900(UNIX)N +776(services)X +1055(and)X +1191(commercial)X +1590(database)X +1887(management)X +2317(systems.)X +3 f +555 1086(3.)N +655(Architecture)X +1 f +755 1209(The)N +906(library)X +1146(is)X +1224(designed)X +1534(to)X +1621(provide)X +1891(well)X +2054(de\256ned)X +2315(interfaces)X +2653(to)X +2740(the)X +2863(services)X +3147(required)X +3440(for)X +3559(transaction)X +3936(processing.)X +555 1299(These)N +777(services)X +1066(are)X +1195(recovery,)X +1527(concurrency)X +1955(control,)X +2232(and)X +2378(the)X +2506(management)X +2946(of)X +3043(shared)X +3283(data.)X +3487(First)X +3663(we)X +3787(will)X +3941(discuss)X +4201(the)X +555 1389(design)N +795(tradeoffs)X +1112(in)X +1205(the)X +1334(selection)X +1650(of)X +1748(recovery,)X +2081(concurrency)X +2510(control,)X +2787(and)X +2933(buffer)X +3160(management)X +3600(implementations,)X +4183(and)X +555 1479(then)N +713(we)X +827(will)X +971(present)X +1223(the)X +1341(overall)X +1584(library)X +1818(architecture)X +2218(and)X +2354(module)X +2614(descriptions.)X +3 f +555 1665(3.1.)N +715(Design)X +966(Tradeoffs)X +1 f +3 f +555 1851(3.1.1.)N +775(Crash)X +1004(Recovery)X +1 f +755 1974(The)N +909(recovery)X +1220(protocol)X +1516(is)X +1598(responsible)X +1992(for)X +2115(providing)X +2455(the)X +2582(transaction)X +2963(semantics)X +3308(discussed)X +3644(earlier.)X +3919(There)X +4136(are)X +4263(a)X +555 2064(wide)N +739(range)X +946(of)X +1041(recovery)X +1351(protocols)X +1677(available)X +1995([HAER83],)X +2395(but)X +2525(we)X +2647(can)X +2786(crudely)X +3054(divide)X +3281(them)X +3468(into)X +3619(two)X +3766(main)X +3953(categories.)X +555 2154(The)N +706(\256rst)X +856(category)X +1159(records)X +1422(all)X +1528(modi\256cations)X +1989(to)X +2077(the)X +2201(database)X +2504(in)X +2592(a)X +2653(separate)X +2942(\256le,)X +3089(and)X +3230(uses)X +3393(this)X +3533(\256le)X +3660(\(log\))X +3841(to)X +3928(back)X +4105(out)X +4232(or)X +555 2244(reapply)N +825(these)X +1019(modi\256cations)X +1483(if)X +1561(a)X +1626(transaction)X +2007(aborts)X +2232(or)X +2328(the)X +2455(system)X +2706(crashes.)X +3012(We)X +3153(call)X +3298(this)X +3442(set)X +3560(the)X +3 f +3687(logging)X +3963(protocols)X +1 f +4279(.)X +555 2334(The)N +703(second)X +949(category)X +1249(avoids)X +1481(the)X +1602(use)X +1732(of)X +1822(a)X +1881(log)X +2006(by)X +2109(carefully)X +2418(controlling)X +2792(when)X +2989(data)X +3146(are)X +3268(written)X +3518(to)X +3603(disk.)X +3799(We)X +3934(call)X +4073(this)X +4210(set)X +555 2424(the)N +3 f +673(non-logging)X +1096(protocols)X +1 f +1412(.)X +755 2547(Non-logging)N +1185(protocols)X +1504(hold)X +1666(dirty)X +1837(buffers)X +2085(in)X +2167(main)X +2347(memory)X +2634(or)X +2721(temporary)X +3071(\256les)X +3224(until)X +3390(commit)X +3654(and)X +3790(then)X +3948(force)X +4134(these)X +555 2637(pages)N +769(to)X +862(disk)X +1026(at)X +1115(transaction)X +1498(commit.)X +1813(While)X +2040(we)X +2165(can)X +2308(use)X +2446(temporary)X +2807(\256les)X +2971(to)X +3064(hold)X +3237(dirty)X +3418(pages)X +3631(that)X +3781(may)X +3949(need)X +4131(to)X +4223(be)X +555 2727(evicted)N +810(from)X +988(memory)X +1277(during)X +1508(a)X +1566(long-running)X +2006(transaction,)X +2400(the)X +2520(only)X +2684(user-level)X +3023(mechanism)X +3410(to)X +3494(force)X +3682(pages)X +3887(to)X +3971(disk)X +4126(is)X +4201(the)X +3 f +555 2817(fsync)N +1 f +733(\(2\))X +850(system)X +1095(call.)X +1274(Unfortunately,)X +3 f +1767(fsync)X +1 f +1945(\(2\))X +2062(is)X +2138(an)X +2237(expensive)X +2581(system)X +2826(call)X +2965(in)X +3050(that)X +3193(it)X +3260(forces)X +3480(all)X +3583(pages)X +3789(of)X +3879(a)X +3938(\256le)X +4062(to)X +4146(disk,)X +555 2907(and)N +691(transactions)X +1094(that)X +1234(manage)X +1504(more)X +1689(than)X +1847(one)X +1983(\256le)X +2105(must)X +2280(issue)X +2460(one)X +2596(call)X +2732(per)X +2855(\256le.)X +755 3030(In)N +853(addition,)X +3 f +1166(fsync)X +1 f +1344(\(2\))X +1469(provides)X +1776(no)X +1887(way)X +2051(to)X +2143(control)X +2400(the)X +2528(order)X +2728(in)X +2820(which)X +3046(dirty)X +3227(pages)X +3440(are)X +3569(written)X +3826(to)X +3918(disk.)X +4121(Since)X +555 3120(non-logging)N +976(protocols)X +1304(must)X +1489(sometimes)X +1861(order)X +2061(writes)X +2287(carefully)X +2603([SULL92],)X +2987(they)X +3155(are)X +3284(dif\256cult)X +3567(to)X +3659(implement)X +4030(on)X +4139(Unix)X +555 3210(systems.)N +868(As)X +977(a)X +1033(result,)X +1251(we)X +1365(have)X +1537(chosen)X +1780(to)X +1862(implement)X +2224(a)X +2280(logging)X +2544(protocol.)X +755 3333(Logging)N +1050(protocols)X +1372(may)X +1534(be)X +1634(categorized)X +2029(based)X +2236(on)X +2340(how)X +2502(information)X +2904(is)X +2981(logged)X +3223(\(physically)X +3602(or)X +3692(logically\))X +4022(and)X +4161(how)X +555 3423(much)N +767(is)X +854(logged)X +1106(\(before)X +1373(images,)X +1654(after)X +1836(images)X +2097(or)X +2198(both\).)X +2441(In)X +3 f +2542(physical)X +2855(logging)X +1 f +3103(,)X +3157(images)X +3417(of)X +3517(complete)X +3844(physical)X +4144(units)X +555 3513(\(pages)N +786(or)X +874(buffers\))X +1150(are)X +1270(recorded,)X +1593(while)X +1792(in)X +3 f +1875(logical)X +2118(logging)X +1 f +2387(a)X +2444(description)X +2820(of)X +2907(the)X +3025(operation)X +3348(is)X +3421(recorded.)X +3763(Therefore,)X +4121(while)X +555 3603(we)N +675(may)X +839(record)X +1071(entire)X +1280(pages)X +1489(in)X +1577(a)X +1639(physical)X +1932(log,)X +2080(we)X +2200(need)X +2378(only)X +2546(record)X +2777(the)X +2900(records)X +3162(being)X +3365(modi\256ed)X +3674(in)X +3761(a)X +3822(logical)X +4065(log.)X +4232(In)X +555 3693(fact,)N +718(physical)X +1006(logging)X +1271(can)X +1404(be)X +1501(thought)X +1766(of)X +1854(as)X +1942(a)X +1999(special)X +2243(case)X +2403(of)X +2491(logical)X +2730(logging,)X +3015(since)X +3201(the)X +3320 0.3125(``records'')AX +3686(that)X +3827(we)X +3942(log)X +4065(in)X +4148(logi-)X +555 3783(cal)N +673(logging)X +941(might)X +1151(be)X +1251(physical)X +1542(pages.)X +1789(Since)X +1991(logical)X +2233(logging)X +2501(is)X +2578(both)X +2743(more)X +2931(space-ef\256cient)X +3423(and)X +3562(more)X +3750(general,)X +4030(we)X +4147(have)X +555 3873(chosen)N +798(it)X +862(for)X +976(our)X +1103(logging)X +1367(protocol.)X +755 3996(In)N +3 f +843(before-image)X +1315(logging)X +1 f +1563(,)X +1604(we)X +1719(log)X +1842(a)X +1899(copy)X +2076(of)X +2164(the)X +2283(data)X +2438(before)X +2665(the)X +2784(update,)X +3039(while)X +3238(in)X +3 f +3321(after-image)X +3739(logging)X +1 f +3987(,)X +4027(we)X +4141(log)X +4263(a)X +555 4086(copy)N +740(of)X +836(the)X +963(data)X +1126(after)X +1303(the)X +1429(update.)X +1711(If)X +1793(we)X +1915(log)X +2045(only)X +2215(before-images,)X +2723(then)X +2889(there)X +3078(is)X +3159(suf\256cient)X +3485(information)X +3891(in)X +3981(the)X +4107(log)X +4237(to)X +555 4176(allow)N +761(us)X +860(to)X +3 f +950(undo)X +1 f +1150(the)X +1276(transaction)X +1656(\(go)X +1791(back)X +1971(to)X +2061(the)X +2187(state)X +2361(represented)X +2759(by)X +2866(the)X +2991(before-image\).)X +3514(However,)X +3876(if)X +3952(the)X +4077(system)X +555 4266(crashes)N +814(and)X +952(a)X +1010(committed)X +1374(transaction's)X +1806(changes)X +2087(have)X +2261(not)X +2385(reached)X +2658(the)X +2778(disk,)X +2953(we)X +3068(have)X +3241(no)X +3342(means)X +3568(to)X +3 f +3651(redo)X +1 f +3828(the)X +3947(transaction)X +555 4356(\(reapply)N +849(the)X +973(updates\).)X +1311(Therefore,)X +1675(logging)X +1945(only)X +2113(before-images)X +2599(necessitates)X +3004(forcing)X +3262(dirty)X +3439(pages)X +3648(at)X +3732(commit)X +4002(time.)X +4210(As)X +555 4446(mentioned)N +913(above,)X +1145(forcing)X +1397(pages)X +1600(at)X +1678(commit)X +1942(is)X +2015(considered)X +2383(too)X +2505(costly.)X +755 4569(If)N +834(we)X +953(log)X +1080(only)X +1247(after-images,)X +1694(then)X +1857(there)X +2043(is)X +2121(suf\256cient)X +2444(information)X +2847(in)X +2934(the)X +3057(log)X +3184(to)X +3271(allow)X +3474(us)X +3570(to)X +3657(redo)X +3825(the)X +3947(transaction)X +555 4659(\(go)N +687(forward)X +967(to)X +1054(the)X +1177(state)X +1348(represented)X +1743(by)X +1847(the)X +1969(after-image\),)X +2411(but)X +2537(we)X +2655(do)X +2759(not)X +2885(have)X +3061(the)X +3183(information)X +3585(required)X +3877(to)X +3963(undo)X +4147(tran-)X +555 4749(sactions)N +845(which)X +1073(aborted)X +1346(after)X +1526(dirty)X +1709(pages)X +1924(were)X +2113(written)X +2372(to)X +2466(disk.)X +2670(Therefore,)X +3039(logging)X +3314(only)X +3487(after-images)X +3920(necessitates)X +555 4839(holding)N +819(all)X +919(dirty)X +1090(buffers)X +1338(in)X +1420(main)X +1600(memory)X +1887(until)X +2053(commit)X +2317(or)X +2404(writing)X +2655(them)X +2835(to)X +2917(a)X +2973(temporary)X +3323(\256le.)X +755 4962(Since)N +956(neither)X +1202(constraint)X +1541(\(forcing)X +1823(pages)X +2029(on)X +2132(commit)X +2399(or)X +2489(buffering)X +2811(pages)X +3016(until)X +3184(commit\))X +3477(was)X +3624(feasible,)X +3916(we)X +4032(chose)X +4237(to)X +555 5052(log)N +683(both)X +851(before)X +1083(and)X +1225(after)X +1399(images.)X +1672(The)X +1823(only)X +1991(remaining)X +2342(consideration)X +2800(is)X +2879(when)X +3079(changes)X +3363(get)X +3486(written)X +3738(to)X +3825(disk.)X +4023(Changes)X +555 5142(affect)N +764(both)X +931(data)X +1090(pages)X +1298(and)X +1438(the)X +1560(log.)X +1726(If)X +1804(the)X +1926(changed)X +2218(data)X +2376(page)X +2552(is)X +2629(written)X +2880(before)X +3110(the)X +3232(log)X +3358(page,)X +3554(and)X +3694(the)X +3816(system)X +4062(crashes)X +555 5232(before)N +787(the)X +911(log)X +1039(page)X +1217(is)X +1296(written,)X +1569(the)X +1693(log)X +1820(will)X +1969(contain)X +2230(insuf\256cient)X +2615(information)X +3018(to)X +3105(undo)X +3290(the)X +3413(change.)X +3706(This)X +3873(violates)X +4147(tran-)X +555 5322(saction)N +803(semantics,)X +1160(since)X +1346(some)X +1536(changed)X +1825(data)X +1980(pages)X +2184(may)X +2343(not)X +2466(have)X +2638(been)X +2810(written,)X +3077(and)X +3213(the)X +3331(database)X +3628(cannot)X +3862(be)X +3958(restored)X +4237(to)X +555 5412(its)N +650(pre-transaction)X +1152(state.)X +755 5535(The)N +914(log)X +1050(record)X +1290(describing)X +1658(an)X +1768(update)X +2016(must)X +2205(be)X +2315(written)X +2576(to)X +2672(stable)X +2893(storage)X +3159(before)X +3398(the)X +3529(modi\256ed)X +3846(page.)X +4071(This)X +4246(is)X +3 f +555 5625(write-ahead)N +992(logging)X +1 f +1240(.)X +1307(If)X +1388(log)X +1517(records)X +1781(are)X +1907(safely)X +2126(written)X +2380(to)X +2469(disk,)X +2649(data)X +2810(pages)X +3020(may)X +3185(be)X +3288(written)X +3542(at)X +3627(any)X +3770(time)X +3939(afterwards.)X +555 5715(This)N +721(means)X +950(that)X +1094(the)X +1216(only)X +1382(\256le)X +1508(that)X +1652(ever)X +1815(needs)X +2022(to)X +2108(be)X +2208(forced)X +2438(to)X +2524(disk)X +2681(is)X +2758(the)X +2880(log.)X +3046(Since)X +3248(the)X +3370(log)X +3495(is)X +3571(append-only,)X +4015(modi\256ed)X + +4 p +%%Page: 4 4 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(pages)N +760(always)X +1005(appear)X +1242(at)X +1322(the)X +1442(end)X +1580(and)X +1718(may)X +1878(be)X +1976(written)X +2224(to)X +2307(disk)X +2461(ef\256ciently)X +2807(in)X +2890(any)X +3027(\256le)X +3150(system)X +3393(that)X +3534(favors)X +3756(sequential)X +4102(order-)X +555 720(ing)N +677(\()X +2 f +704(e.g.)X +1 f +820(,)X +860(FFS,)X +1032(log-structured)X +1502(\256le)X +1624(system,)X +1886(or)X +1973(an)X +2069(extent-based)X +2495(system\).)X +3 f +555 906(3.1.2.)N +775(Concurrency)X +1245(Control)X +1 f +755 1029(The)N +918(concurrency)X +1354(control)X +1619(protocol)X +1923(is)X +2013(responsible)X +2415(for)X +2546(maintaining)X +2965(consistency)X +3376(in)X +3475(the)X +3610(presence)X +3929(of)X +4033(multiple)X +555 1119(accesses.)N +897(There)X +1114(are)X +1242(several)X +1499(alternative)X +1867(solutions)X +2183(such)X +2358(as)X +2453(locking,)X +2741(optimistic)X +3088(concurrency)X +3514(control)X +3769([KUNG81],)X +4183(and)X +555 1209(timestamp)N +912(ordering)X +1208([BERN80].)X +1619(Since)X +1821(optimistic)X +2164(methods)X +2459(and)X +2599(timestamp)X +2956(ordering)X +3252(are)X +3374(generally)X +3696(more)X +3884(complex)X +4183(and)X +555 1299(restrict)N +804(concurrency)X +1228(without)X +1498(eliminating)X +1888(starvation)X +2230(or)X +2323(deadlocks,)X +2690(we)X +2810(chose)X +3018(two-phase)X +3373(locking)X +3638(\(2PL\).)X +3890(Strict)X +4088(2PL)X +4246(is)X +555 1389(suboptimal)N +935(for)X +1054(certain)X +1297(data)X +1455(structures)X +1791(such)X +1962(as)X +2053(B-trees)X +2309(because)X +2588(it)X +2656(can)X +2792(limit)X +2966(concurrency,)X +3408(so)X +3503(we)X +3621(use)X +3752(a)X +3812(special)X +4059(locking)X +555 1479(protocol)N +842(based)X +1045(on)X +1145(one)X +1281(described)X +1609(in)X +1691([LEHM81].)X +755 1602(The)N +901(B-tree)X +1123(locking)X +1384(protocol)X +1672(we)X +1787(implemented)X +2226(releases)X +2502(locks)X +2691(at)X +2769(internal)X +3034(nodes)X +3241(in)X +3323(the)X +3441(tree)X +3582(as)X +3669(it)X +3733(descends.)X +4083(A)X +4161(lock)X +555 1692(on)N +658(an)X +757(internal)X +1025(page)X +1200(is)X +1276(always)X +1522(released)X +1808(before)X +2036(a)X +2094(lock)X +2254(on)X +2356(its)X +2453(child)X +2635(is)X +2710(obtained)X +3008(\(that)X +3177(is,)X +3272(locks)X +3463(are)X +3584(not)X +3 f +3708(coupled)X +1 f +3996([BAY77])X +555 1782(during)N +786(descent\).)X +1116(When)X +1330(a)X +1388(leaf)X +1531(\(or)X +1647(internal\))X +1941(page)X +2115(is)X +2190(split,)X +2369(a)X +2427(write)X +2614(lock)X +2774(is)X +2849(acquired)X +3148(on)X +3250(the)X +3370(parent)X +3593(before)X +3821(the)X +3941(lock)X +4100(on)X +4201(the)X +555 1872(just-split)N +855(page)X +1028(is)X +1102(released)X +1387(\(locks)X +1604(are)X +3 f +1724(coupled)X +1 f +2011(during)X +2241(ascent\).)X +2530(Write)X +2734(locks)X +2924(on)X +3025(internal)X +3291(pages)X +3495(are)X +3615(released)X +3899(immediately)X +555 1962(after)N +723(the)X +841(page)X +1013(is)X +1086(updated,)X +1380(but)X +1502(locks)X +1691(on)X +1791(leaf)X +1932(pages)X +2135(are)X +2254(held)X +2412(until)X +2578(the)X +2696(end)X +2832(of)X +2919(the)X +3037(transaction.)X +755 2085(Since)N +964(locks)X +1164(are)X +1294(released)X +1589(during)X +1828(descent,)X +2119(the)X +2247(structure)X +2558(of)X +2655(the)X +2783(tree)X +2934(may)X +3102(change)X +3360(above)X +3582(a)X +3648(node)X +3834(being)X +4042(used)X +4219(by)X +555 2175(some)N +752(process.)X +1061(If)X +1143(that)X +1291(process)X +1560(must)X +1743(later)X +1914(ascend)X +2161(the)X +2287(tree)X +2435(because)X +2717(of)X +2811(a)X +2874(page)X +3053(split,)X +3237(any)X +3380(such)X +3554(change)X +3809(must)X +3991(not)X +4120(cause)X +555 2265(confusion.)N +938(We)X +1077(use)X +1211(the)X +1336(technique)X +1675(described)X +2010(in)X +2099([LEHM81])X +2487(which)X +2710(exploits)X +2989(the)X +3113(ordering)X +3411(of)X +3504(data)X +3664(on)X +3770(a)X +3832(B-tree)X +4059(page)X +4237(to)X +555 2355(guarantee)N +888(that)X +1028(no)X +1128(process)X +1389(ever)X +1548(gets)X +1697(lost)X +1832(as)X +1919(a)X +1975(result)X +2173(of)X +2260(internal)X +2525(page)X +2697(updates)X +2962(made)X +3156(by)X +3256(other)X +3441(processes.)X +755 2478(If)N +836(a)X +899(transaction)X +1278(that)X +1425(updates)X +1697(a)X +1760(B-tree)X +1988(aborts,)X +2231(the)X +2356(user-visible)X +2757(changes)X +3043(to)X +3131(the)X +3255(tree)X +3402(must)X +3583(be)X +3685(rolled)X +3898(back.)X +4116(How-)X +555 2568(ever,)N +735(changes)X +1015(to)X +1097(the)X +1215(internal)X +1480(nodes)X +1687(of)X +1774(the)X +1892(tree)X +2033(need)X +2205(not)X +2327(be)X +2423(rolled)X +2630(back,)X +2822(since)X +3007(these)X +3192(pages)X +3395(contain)X +3651(no)X +3751(user-visible)X +4145(data.)X +555 2658(When)N +771(rolling)X +1008(back)X +1184(a)X +1244(transaction,)X +1640(we)X +1758(roll)X +1893(back)X +2069(all)X +2173(leaf)X +2318(page)X +2494(updates,)X +2783(but)X +2909(no)X +3013(internal)X +3281(insertions)X +3615(or)X +3705(page)X +3880(splits.)X +4111(In)X +4201(the)X +555 2748(worst)N +759(case,)X +944(this)X +1085(will)X +1235(leave)X +1431(a)X +1493(leaf)X +1640(page)X +1818(less)X +1964(than)X +2128(half)X +2279(full.)X +2456(This)X +2624(may)X +2788(cause)X +2993(poor)X +3166(space)X +3371(utilization,)X +3741(but)X +3869(does)X +4042(not)X +4170(lose)X +555 2838(user)N +709(data.)X +755 2961(Holding)N +1038(locks)X +1228(on)X +1329(leaf)X +1471(pages)X +1675(until)X +1842(transaction)X +2215(commit)X +2480(guarantees)X +2845(that)X +2986(no)X +3087(other)X +3273(process)X +3535(can)X +3668(insert)X +3866(or)X +3953(delete)X +4165(data)X +555 3051(that)N +711(has)X +854(been)X +1042(touched)X +1332(by)X +1448(this)X +1598(process.)X +1914(Rolling)X +2188(back)X +2375(insertions)X +2721(and)X +2872(deletions)X +3196(on)X +3311(leaf)X +3467(pages)X +3685(guarantees)X +4064(that)X +4219(no)X +555 3141(aborted)N +819(updates)X +1087(are)X +1209(ever)X +1371(visible)X +1607(to)X +1692(other)X +1880(transactions.)X +2326(Leaving)X +2612(page)X +2787(splits)X +2978(intact)X +3179(permits)X +3442(us)X +3536(to)X +3621(release)X +3867(internal)X +4134(write)X +555 3231(locks)N +744(early.)X +965(Thus)X +1145(transaction)X +1517(semantics)X +1853(are)X +1972(preserved,)X +2325(and)X +2461(locks)X +2650(are)X +2769(held)X +2927(for)X +3041(shorter)X +3284(periods.)X +755 3354(The)N +901(extra)X +1083(complexity)X +1464(introduced)X +1828(by)X +1929(this)X +2065(locking)X +2326(protocol)X +2614(appears)X +2881(substantial,)X +3264(but)X +3387(it)X +3452(is)X +3525(important)X +3856(for)X +3970(multi-user)X +555 3444(execution.)N +950(The)X +1118(bene\256ts)X +1410(of)X +1520(non-two-phase)X +2040(locking)X +2323(on)X +2446(B-trees)X +2721(are)X +2863(well)X +3044(established)X +3443(in)X +3548(the)X +3689(database)X +4009(literature)X +555 3534([BAY77],)N +899([LEHM81].)X +1320(If)X +1394(a)X +1450(process)X +1711(held)X +1869(locks)X +2058(until)X +2224(it)X +2288(committed,)X +2670(then)X +2828(a)X +2884(long-running)X +3322(update)X +3556(could)X +3754(lock)X +3912(out)X +4034(all)X +4134(other)X +555 3624(transactions)N +967(by)X +1076(preventing)X +1448(any)X +1593(other)X +1787(process)X +2057(from)X +2241(locking)X +2509(the)X +2635(root)X +2792(page)X +2972(of)X +3067(the)X +3193(tree.)X +3382(The)X +3535(B-tree)X +3764(locking)X +4032(protocol)X +555 3714(described)N +884(above)X +1096(guarantees)X +1460(that)X +1600(locks)X +1789(on)X +1889(internal)X +2154(pages)X +2357(are)X +2476(held)X +2634(for)X +2748(extremely)X +3089(short)X +3269(periods,)X +3545(thereby)X +3806(increasing)X +4156(con-)X +555 3804(currency.)N +3 f +555 3990(3.1.3.)N +775(Management)X +1245(of)X +1332(Shared)X +1596(Data)X +1 f +755 4113(Database)N +1075(systems)X +1353(permit)X +1587(many)X +1790(users)X +1980(to)X +2067(examine)X +2364(and)X +2505(update)X +2744(the)X +2866(same)X +3055(data)X +3213(concurrently.)X +3683(In)X +3774(order)X +3968(to)X +4054(provide)X +555 4203(this)N +702(concurrent)X +1078(access)X +1316(and)X +1464(enforce)X +1738(the)X +1868(write-ahead)X +2280(logging)X +2556(protocol)X +2855(described)X +3195(in)X +3289(section)X +3548(3.1.1,)X +3759(we)X +3884(use)X +4022(a)X +4089(shared)X +555 4293(memory)N +848(buffer)X +1071(manager.)X +1414(Not)X +1559(only)X +1726(does)X +1898(this)X +2038(provide)X +2308(the)X +2431(guarantees)X +2800(we)X +2919(require,)X +3192(but)X +3319(a)X +3380(user-level)X +3722(buffer)X +3944(manager)X +4246(is)X +555 4383(frequently)N +916(faster)X +1126(than)X +1295(using)X +1498(the)X +1626(\256le)X +1758(system)X +2010(buffer)X +2237(cache.)X +2491(Reads)X +2717(or)X +2814(writes)X +3040(involving)X +3376(the)X +3504(\256le)X +3636(system)X +3888(buffer)X +4115(cache)X +555 4473(often)N +746(require)X +1000(copying)X +1284(data)X +1444(between)X +1738(user)X +1898(and)X +2040(kernel)X +2266(space)X +2470(while)X +2673(a)X +2734(user-level)X +3076(buffer)X +3298(manager)X +3600(can)X +3737(return)X +3954(pointers)X +4237(to)X +555 4563(data)N +709(pages)X +912(directly.)X +1217(Additionally,)X +1661(if)X +1730(more)X +1915(than)X +2073(one)X +2209(process)X +2470(uses)X +2628(the)X +2746(same)X +2931(page,)X +3123(then)X +3281(fewer)X +3485(copies)X +3710(may)X +3868(be)X +3964(required.)X +3 f +555 4749(3.2.)N +715(Module)X +997(Architecture)X +1 f +755 4872(The)N +913(preceding)X +1262(sections)X +1552(described)X +1892(modules)X +2195(for)X +2321(managing)X +2669(the)X +2799(transaction)X +3183(log,)X +3337(locks,)X +3558(and)X +3706(a)X +3774(cache)X +3990(of)X +4089(shared)X +555 4962(buffers.)N +847(In)X +938(addition,)X +1244(we)X +1362(need)X +1538(to)X +1624(provide)X +1893(functionality)X +2326(for)X +2444(transaction)X +2 f +2819(begin)X +1 f +2997(,)X +2 f +3040(commit)X +1 f +3276(,)X +3319(and)X +2 f +3458(abort)X +1 f +3654(processing,)X +4040(necessi-)X +555 5052(tating)N +769(a)X +837(transaction)X +1221(manager.)X +1570(In)X +1669(order)X +1871(to)X +1965(arbitrate)X +2265(concurrent)X +2641(access)X +2879(to)X +2973(locks)X +3173(and)X +3320(buffers,)X +3599(we)X +3724(include)X +3991(a)X +4058(process)X +555 5142(management)N +995(module)X +1264(which)X +1489(manages)X +1799(a)X +1864(collection)X +2209(of)X +2305(semaphores)X +2713(used)X +2889(to)X +2980(block)X +3187(and)X +3332(release)X +3585(processes.)X +3962(Finally,)X +4237(in)X +555 5232(order)N +752(to)X +841(provide)X +1113(a)X +1176(simple,)X +1436(standard)X +1735(interface)X +2044(we)X +2165(have)X +2344(modi\256ed)X +2655(the)X +2780(database)X +3084(access)X +3317(routines)X +3602(\()X +3 f +3629(db)X +1 f +3717(\(3\)\).)X +3904(For)X +4041(the)X +4165(pur-)X +555 5322(poses)N +758(of)X +850(this)X +990(paper)X +1194(we)X +1313(call)X +1453(the)X +1575(modi\256ed)X +1883(package)X +2171(the)X +3 f +2293(Record)X +2567(Manager)X +1 f +2879(.)X +2943(Figure)X +3176(one)X +3316(shows)X +3540(the)X +3662(main)X +3846(interfaces)X +4183(and)X +555 5412(architecture)N +955(of)X +1042(LIBTP.)X + +5 p +%%Page: 5 5 +10 s 10 xH 0 xS 1 f +3 f +1 f +11 s +1851 1520(log_commit)N +2764 2077(buf_unpin)N +2764 1987(buf_get)N +3633 1408(buf_unpin)N +3633 1319(buf_pin)N +3633 1230(buf_get)N +3 f +17 s +1163 960(Txn)N +1430(M)X +1559(anager)X +2582(Record)X +3040(M)X +3169(anager)X +1 Dt +2363 726 MXY +0 355 Dl +1426 0 Dl +0 -355 Dl +-1426 0 Dl +3255 1616 MXY +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +2185 MX +0 535 Dl +535 0 Dl +0 -535 Dl +-535 0 Dl +1116 MX +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +726 MY +0 355 Dl +891 0 Dl +0 -355 Dl +-891 0 Dl +1 f +11 s +2207 1297(lock)N +2564 1386(log)N +865(unlock_all)X +1851 1609(log_unroll)N +1650 2508 MXY +0 178 Dl +1605 0 Dl +0 -178 Dl +-1605 0 Dl +1294 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +2319 2508 MXY +-22 -30 Dl +4 23 Dl +-18 14 Dl +36 -7 Dl +-936 -357 Dl +3277 2455(sleep_on)N +1405 1616 MXY +36 4 Dl +-18 -13 Dl +1 -22 Dl +-19 31 Dl +1070 -535 Dl +2631 2508 MXY +36 6 Dl +-18 -14 Dl +3 -22 Dl +-21 30 Dl +891 -357 Dl +1426 2455(sleep_on)N +3255 1884 MXY +-31 -20 Dl +11 20 Dl +-11 19 Dl +31 -19 Dl +-535 0 Dl +1554 2366(wake)N +3277(wake)X +2185 1884 MXY +-31 -20 Dl +12 20 Dl +-12 19 Dl +31 -19 Dl +-356 0 Dl +0 -803 Dl +3 f +17 s +1236 1851(Lock)N +1118 2030(M)N +1247(anager)X +2339 1851(Log)N +2187 2030(M)N +2316(anager)X +3333 1851(Buffer)N +3257 2030(M)N +3386(anager)X +3522 1616 MXY +20 -30 Dl +-20 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1950 2654(Process)N +2424(M)X +2553(anager)X +2542 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1 f +11 s +2207 1364(unlock)N +2452 2508 MXY +20 -31 Dl +-20 11 Dl +-19 -11 Dl +19 31 Dl +0 -357 Dl +2497 2322(sleep_on)N +2497 2233(wake)N +3 Dt +-1 Ds +3 f +10 s +1790 2830(Figure)N +2037(1:)X +2144(Library)X +2435(module)X +2708(interfaces.)X +1 f +10 f +555 3010(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3286(3.2.1.)N +775(The)X +928(Log)X +1081(Manager)X +1 f +755 3409(The)N +3 f +907(Log)X +1067(Manager)X +1 f +1406(enforces)X +1706(the)X +1831(write-ahead)X +2238(logging)X +2509(protocol.)X +2843(Its)X +2949(primitive)X +3268(operations)X +3628(are)X +2 f +3753(log)X +1 f +3855(,)X +2 f +3901(log_commit)X +1 f +4279(,)X +2 f +555 3499(log_read)N +1 f +844(,)X +2 f +889(log_roll)X +1 f +1171(and)X +2 f +1312(log_unroll)X +1 f +1649(.)X +1714(The)X +2 f +1864(log)X +1 f +1991(call)X +2132(performs)X +2447(a)X +2508(buffered)X +2806(write)X +2996(of)X +3088(the)X +3211(speci\256ed)X +3520(log)X +3646(record)X +3876(and)X +4016(returns)X +4263(a)X +555 3589(unique)N +809(log)X +947(sequence)X +1278(number)X +1559(\(LSN\).)X +1840(This)X +2017(LSN)X +2203(may)X +2376(then)X +2549(be)X +2660(used)X +2842(to)X +2939(retrieve)X +3220(a)X +3291(record)X +3532(from)X +3723(the)X +3856(log)X +3993(using)X +4201(the)X +2 f +555 3679(log_read)N +1 f +865(call.)X +1042(The)X +2 f +1188(log)X +1 f +1311(interface)X +1614(knows)X +1844(very)X +2008(little)X +2175(about)X +2374(the)X +2493(internal)X +2759(format)X +2993(of)X +3080(the)X +3198(log)X +3320(records)X +3577(it)X +3641(receives.)X +3965(Rather,)X +4219(all)X +555 3769(log)N +681(records)X +942(are)X +1065 0.4028(referenced)AX +1430(by)X +1534(a)X +1594(header)X +1833(structure,)X +2158(a)X +2218(log)X +2344(record)X +2574(type,)X +2756(and)X +2896(a)X +2956(character)X +3276(buffer)X +3497(containing)X +3859(the)X +3981(data)X +4138(to)X +4223(be)X +555 3859(logged.)N +834(The)X +980(log)X +1103(record)X +1330(type)X +1489(is)X +1563(used)X +1731(to)X +1814(call)X +1951(the)X +2070(appropriate)X +2457(redo)X +2621(and)X +2758(undo)X +2939(routines)X +3217(during)X +2 f +3446(abort)X +1 f +3639(and)X +2 f +3775(commit)X +1 f +4031(process-)X +555 3949(ing.)N +721(While)X +941(we)X +1059(have)X +1235(used)X +1406(the)X +3 f +1528(Log)X +1684(Manager)X +1 f +2019(to)X +2104(provide)X +2372(before)X +2601(and)X +2740(after)X +2911(image)X +3130(logging,)X +3417(it)X +3484(may)X +3645(also)X +3797(be)X +3896(used)X +4066(for)X +4183(any)X +555 4039(of)N +642(the)X +760(logging)X +1024(algorithms)X +1386(discussed.)X +755 4162(The)N +2 f +905(log_commit)X +1 f +1308(operation)X +1636(behaves)X +1920(exactly)X +2177(like)X +2322(the)X +2 f +2445(log)X +1 f +2572(operation)X +2900(but)X +3026(guarantees)X +3394(that)X +3538(the)X +3660(log)X +3786(has)X +3917(been)X +4093(forced)X +555 4252(to)N +643(disk)X +802(before)X +1034(returning.)X +1394(A)X +1478(discussion)X +1837(of)X +1930(our)X +2063(commit)X +2333(strategy)X +2613(appears)X +2884(in)X +2971(the)X +3094(implementation)X +3621(section)X +3873(\(section)X +4152(4.2\).)X +2 f +555 4342(Log_unroll)N +1 f +935(reads)X +1126(log)X +1249(records)X +1507(from)X +1684(the)X +1803(log,)X +1946(following)X +2278(backward)X +2611(transaction)X +2983(pointers)X +3261(and)X +3397(calling)X +3635(the)X +3753(appropriate)X +4139(undo)X +555 4432(routines)N +839(to)X +927(implement)X +1295(transaction)X +1673(abort.)X +1904(In)X +1997(a)X +2059(similar)X +2307(manner,)X +2 f +2594(log_roll)X +1 f +2877(reads)X +3073(log)X +3201(records)X +3464(sequentially)X +3877(forward,)X +4178(cal-)X +555 4522(ling)N +699(the)X +817(appropriate)X +1203(redo)X +1366(routines)X +1644(to)X +1726(recover)X +1988(committed)X +2350(transactions)X +2753(after)X +2921(a)X +2977(system)X +3219(crash.)X +3 f +555 4708(3.2.2.)N +775(The)X +928(Buffer)X +1171(Manager)X +1 f +755 4831(The)N +3 f +912(Buffer)X +1167(Manager)X +1 f +1511(uses)X +1681(a)X +1749(pool)X +1923(of)X +2022(shared)X +2264(memory)X +2563(to)X +2657(provide)X +2934(a)X +3002(least-recently-used)X +3641(\(LRU\))X +3886(block)X +4095(cache.)X +555 4921(Although)N +886(the)X +1013(current)X +1270(library)X +1513(provides)X +1818(an)X +1923(LRU)X +2112(cache,)X +2345(it)X +2418(would)X +2647(be)X +2752(simple)X +2994(to)X +3085(add)X +3229(alternate)X +3534(replacement)X +3955(policies)X +4232(as)X +555 5011(suggested)N +903(by)X +1015([CHOU85])X +1408(or)X +1507(to)X +1601(provide)X +1878(multiple)X +2176(buffer)X +2405(pools)X +2610(with)X +2784(different)X +3092(policies.)X +3412(Transactions)X +3853(request)X +4116(pages)X +555 5101(from)N +736(the)X +859(buffer)X +1081(manager)X +1383(and)X +1524(keep)X +1701(them)X +3 f +1886(pinned)X +1 f +2145(to)X +2232(ensure)X +2466(that)X +2610(they)X +2772(are)X +2895(not)X +3021(written)X +3272(to)X +3358(disk)X +3515(while)X +3717(they)X +3879(are)X +4002(in)X +4088(a)X +4148(logi-)X +555 5191(cally)N +732(inconsistent)X +1135(state.)X +1343(When)X +1556(page)X +1729(replacement)X +2143(is)X +2217(necessary,)X +2571(the)X +3 f +2689(Buffer)X +2932(Manager)X +1 f +3264(\256nds)X +3439(an)X +3535(unpinned)X +3853(page)X +4025(and)X +4161(then)X +555 5281(checks)N +794(with)X +956(the)X +3 f +1074(Log)X +1227(Manager)X +1 f +1559(to)X +1641(ensure)X +1871(that)X +2011(the)X +2129(write-ahead)X +2529(protocol)X +2816(is)X +2889(enforced.)X +3 f +555 5467(3.2.3.)N +775(The)X +928(Lock)X +1121(Manager)X +1 f +755 5590(The)N +3 f +901(Lock)X +1095(Manager)X +1 f +1428(supports)X +1720(general)X +1978(purpose)X +2253(locking)X +2514(\(single)X +2753(writer,)X +2986(multiple)X +3273(readers\))X +3553(which)X +3769(is)X +3842(currently)X +4152(used)X +555 5680(to)N +638(provide)X +904(two-phase)X +1254(locking)X +1514(and)X +1650(high)X +1812(concurrency)X +2230(B-tree)X +2451(locking.)X +2751(However,)X +3086(the)X +3204(general)X +3461(purpose)X +3735(nature)X +3956(of)X +4043(the)X +4161(lock)X + +6 p +%%Page: 6 6 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(manager)N +857(provides)X +1158(the)X +1281(ability)X +1510(to)X +1597(support)X +1862(a)X +1923(variety)X +2171(of)X +2263(locking)X +2528(protocols.)X +2890(Currently,)X +3241(all)X +3345(locks)X +3538(are)X +3661(issued)X +3885(at)X +3967(the)X +4089(granu-)X +555 720(larity)N +747(of)X +837(a)X +896(page)X +1071(\(the)X +1219(size)X +1367(of)X +1457(a)X +1516(buffer)X +1736(in)X +1821(the)X +1942(buffer)X +2161(pool\))X +2352(which)X +2570(is)X +2645(identi\256ed)X +2969(by)X +3071(two)X +3213(4-byte)X +3440(integers)X +3716(\(a)X +3801(\256le)X +3925(id)X +4009(and)X +4147(page)X +555 810(number\).)N +898(This)X +1071(provides)X +1378(the)X +1507(necessary)X +1851(information)X +2259(to)X +2351(extend)X +2595(the)X +3 f +2723(Lock)X +2926(Manager)X +1 f +3268(to)X +3360(perform)X +3649(hierarchical)X +4059(locking)X +555 900([GRAY76].)N +982(The)X +1133(current)X +1387(implementation)X +1915(does)X +2088(not)X +2216(support)X +2482(locks)X +2677(at)X +2760(other)X +2950(granularities)X +3376(and)X +3517(does)X +3689(not)X +3816(promote)X +4108(locks;)X +555 990(these)N +740(are)X +859(obvious)X +1132(future)X +1344(additions)X +1657(to)X +1739(the)X +1857(system.)X +755 1113(If)N +831(an)X +929(incoming)X +1253(lock)X +1413(request)X +1667(cannot)X +1903(be)X +2001(granted,)X +2284(the)X +2404(requesting)X +2760(process)X +3023(is)X +3098(queued)X +3352(for)X +3467(the)X +3586(lock)X +3745(and)X +3882(descheduled.)X +555 1203(When)N +769(a)X +827(lock)X +987(is)X +1062(released,)X +1368(the)X +1488(wait)X +1647(queue)X +1860(is)X +1934(traversed)X +2250(and)X +2387(any)X +2524(newly)X +2741(compatible)X +3118(locks)X +3308(are)X +3428(granted.)X +3730(Locks)X +3947(are)X +4067(located)X +555 1293(via)N +680(a)X +743(\256le)X +872(and)X +1015(page)X +1194(hash)X +1368(table)X +1551(and)X +1694(are)X +1820(chained)X +2097(both)X +2266(by)X +2373(object)X +2595(and)X +2737(by)X +2843(transaction,)X +3241(facilitating)X +3614(rapid)X +3805(traversal)X +4108(of)X +4201(the)X +555 1383(lock)N +713(table)X +889(during)X +1118(transaction)X +1490(commit)X +1754(and)X +1890(abort.)X +755 1506(The)N +907(primary)X +1188(interfaces)X +1528(to)X +1617(the)X +1742(lock)X +1907(manager)X +2211(are)X +2 f +2337(lock)X +1 f +2471(,)X +2 f +2518(unlock)X +1 f +2732(,)X +2779(and)X +2 f +2922(lock_unlock_all)X +1 f +3434(.)X +2 f +3500(Lock)X +1 f +3682(obtains)X +3939(a)X +4001(new)X +4161(lock)X +555 1596(for)N +680(a)X +747(speci\256c)X +1023(object.)X +1290(There)X +1509(are)X +1638(also)X +1797(two)X +1947(variants)X +2231(of)X +2328(the)X +2 f +2456(lock)X +1 f +2620(request,)X +2 f +2902(lock_upgrade)X +1 f +3373(and)X +2 f +3519(lock_downgrade)X +1 f +4053(,)X +4103(which)X +555 1686(allow)N +755(the)X +875(caller)X +1076(to)X +1160(atomically)X +1519(trade)X +1701(a)X +1758(lock)X +1917(of)X +2005(one)X +2142(type)X +2301(for)X +2416(a)X +2473(lock)X +2632(of)X +2720(another.)X +2 f +3022(Unlock)X +1 f +3275(releases)X +3551(a)X +3608(speci\256c)X +3874(mode)X +4073(of)X +4161(lock)X +555 1776(on)N +655(a)X +711(speci\256c)X +976(object.)X +2 f +1232(Lock_unlock_all)X +1 f +1786(releases)X +2061(all)X +2161(the)X +2279(locks)X +2468(associated)X +2818(with)X +2980(a)X +3036(speci\256c)X +3301(transaction.)X +3 f +555 1962(3.2.4.)N +775(The)X +928(Process)X +1207(Manager)X +1 f +755 2085(The)N +3 f +900(Process)X +1179(Manager)X +1 f +1511(acts)X +1656(as)X +1743(a)X +1799(user-level)X +2136(scheduler)X +2464(to)X +2546(make)X +2740(processes)X +3068(wait)X +3226(on)X +3326(unavailable)X +3716(locks)X +3905(and)X +4041(pending)X +555 2175(buffer)N +778(cache)X +988(I/O.)X +1161(For)X +1297(each)X +1470(process,)X +1756(a)X +1817(semaphore)X +2190(is)X +2268(maintained)X +2649(upon)X +2834(which)X +3055(that)X +3200(process)X +3466(waits)X +3660(when)X +3859(it)X +3928(needs)X +4136(to)X +4223(be)X +555 2265(descheduled.)N +1014(When)X +1228(a)X +1286(process)X +1549(needs)X +1754(to)X +1838(be)X +1936(run,)X +2084(its)X +2180(semaphore)X +2549(is)X +2623(cleared,)X +2897(and)X +3034(the)X +3153(operating)X +3477(system)X +3720(reschedules)X +4116(it.)X +4201(No)X +555 2355(sophisticated)N +1002(scheduling)X +1378(algorithm)X +1718(is)X +1799(applied;)X +2085(if)X +2162(the)X +2288(lock)X +2454(for)X +2576(which)X +2800(a)X +2864(process)X +3133(was)X +3286(waiting)X +3554(becomes)X +3863(available,)X +4201(the)X +555 2445(process)N +824(is)X +905(made)X +1107(runnable.)X +1456(It)X +1533(would)X +1761(have)X +1941(been)X +2121(possible)X +2411(to)X +2501(change)X +2757(the)X +2883(kernel's)X +3170(process)X +3439(scheduler)X +3775(to)X +3865(interact)X +4134(more)X +555 2535(ef\256ciently)N +900(with)X +1062(the)X +1180(lock)X +1338(manager,)X +1655(but)X +1777(doing)X +1979(so)X +2070(would)X +2290(have)X +2462(compromised)X +2918(our)X +3045(commitment)X +3469(to)X +3551(a)X +3607(user-level)X +3944(package.)X +3 f +555 2721(3.2.5.)N +775(The)X +928(Transaction)X +1361(Manager)X +1 f +755 2844(The)N +3 f +901(Transaction)X +1335(Manager)X +1 f +1668(provides)X +1965(the)X +2084(standard)X +2377(interface)X +2680(of)X +2 f +2768(txn_begin)X +1 f +3084(,)X +2 f +3125(txn_commit)X +1 f +3499(,)X +3540(and)X +2 f +3676(txn_abort)X +1 f +3987(.)X +4047(It)X +4116(keeps)X +555 2934(track)N +742(of)X +835(all)X +941(active)X +1159(transactions,)X +1588(assigns)X +1845(unique)X +2089(transaction)X +2467(identi\256ers,)X +2833(and)X +2974(directs)X +3213(the)X +3336(abort)X +3526(and)X +3667(commit)X +3936(processing.)X +555 3024(When)N +772(a)X +2 f +833(txn_begin)X +1 f +1174(is)X +1252(issued,)X +1497(the)X +3 f +1620(Transaction)X +2058(Manager)X +1 f +2395(assigns)X +2651(the)X +2773(next)X +2935(available)X +3249(transaction)X +3625(identi\256er,)X +3958(allocates)X +4263(a)X +555 3114(per-process)N +948(transaction)X +1322(structure)X +1625(in)X +1709(shared)X +1941(memory,)X +2249(increments)X +2622(the)X +2741(count)X +2940(of)X +3028(active)X +3241(transactions,)X +3665(and)X +3802(returns)X +4046(the)X +4165(new)X +555 3204(transaction)N +937(identi\256er)X +1256(to)X +1348(the)X +1476(calling)X +1724(process.)X +2034(The)X +2188(in-memory)X +2573(transaction)X +2954(structure)X +3264(contains)X +3560(a)X +3625(pointer)X +3881(into)X +4034(the)X +4161(lock)X +555 3294(table)N +734(for)X +851(locks)X +1043(held)X +1204(by)X +1307(this)X +1445(transaction,)X +1840(the)X +1961(last)X +2095(log)X +2220(sequence)X +2538(number,)X +2826(a)X +2885(transaction)X +3260(state)X +3430(\()X +2 f +3457(idle)X +1 f +(,)S +2 f +3620(running)X +1 f +3873(,)X +2 f +3915(aborting)X +1 f +4190(,)X +4232(or)X +2 f +555 3384(committing\))N +1 f +942(,)X +982(an)X +1078(error)X +1255(code,)X +1447(and)X +1583(a)X +1639(semaphore)X +2007(identi\256er.)X +755 3507(At)N +859(commit,)X +1147(the)X +3 f +1269(Transaction)X +1706(Manager)X +1 f +2042(calls)X +2 f +2213(log_commit)X +1 f +2615(to)X +2700(record)X +2929(the)X +3050(end)X +3189(of)X +3279(transaction)X +3654(and)X +3793(to)X +3878(\257ush)X +4056(the)X +4177(log.)X +555 3597(Then)N +743(it)X +810(directs)X +1047(the)X +3 f +1168(Lock)X +1364(Manager)X +1 f +1699(to)X +1784(release)X +2031(all)X +2134(locks)X +2325(associated)X +2677(with)X +2841(the)X +2961(given)X +3161(transaction.)X +3575(If)X +3651(a)X +3709(transaction)X +4083(aborts,)X +555 3687(the)N +3 f +680(Transaction)X +1120(Manager)X +1 f +1459(calls)X +1633(on)X +2 f +1739(log_unroll)X +1 f +2102(to)X +2190(read)X +2355(the)X +2479(transaction's)X +2915(log)X +3043(records)X +3306(and)X +3448(undo)X +3634(any)X +3776(modi\256cations)X +4237(to)X +555 3777(the)N +673(database.)X +1010(As)X +1119(in)X +1201(the)X +1319(commit)X +1583(case,)X +1762(it)X +1826(then)X +1984(calls)X +2 f +2151(lock_unlock_all)X +1 f +2683(to)X +2765(release)X +3009(the)X +3127(transaction's)X +3557(locks.)X +3 f +555 3963(3.2.6.)N +775(The)X +928(Record)X +1198(Manager)X +1 f +755 4086(The)N +3 f +919(Record)X +1208(Manager)X +1 f +1559(supports)X +1869(the)X +2006(abstraction)X +2397(of)X +2503(reading)X +2783(and)X +2938(writing)X +3208(records)X +3484(to)X +3585(a)X +3660(database.)X +3996(We)X +4147(have)X +555 4176(modi\256ed)N +861(the)X +981(the)X +1101(database)X +1399(access)X +1626(routines)X +3 f +1905(db)X +1 f +1993(\(3\))X +2108([BSD91])X +2418(to)X +2501(call)X +2638(the)X +2757(log,)X +2900(lock,)X +3079(and)X +3216(buffer)X +3434(managers.)X +3803(In)X +3891(order)X +4082(to)X +4165(pro-)X +555 4266(vide)N +718(functionality)X +1152(to)X +1239(perform)X +1523(undo)X +1708(and)X +1849(redo,)X +2037(the)X +3 f +2160(Record)X +2434(Manager)X +1 f +2770(de\256nes)X +3021(a)X +3081(collection)X +3421(of)X +3512(log)X +3638(record)X +3868(types)X +4061(and)X +4201(the)X +555 4356(associated)N +920(undo)X +1115(and)X +1266(redo)X +1444(routines.)X +1777(The)X +3 f +1937(Log)X +2105(Manager)X +1 f +2452(performs)X +2777(a)X +2848(table)X +3039(lookup)X +3296(on)X +3411(the)X +3543(record)X +3783(type)X +3955(to)X +4051(call)X +4201(the)X +555 4446(appropriate)N +951(routines.)X +1299(For)X +1440(example,)X +1762(the)X +1890(B-tree)X +2121(access)X +2356(method)X +2625(requires)X +2913(two)X +3062(log)X +3193(record)X +3428(types:)X +3648(insert)X +3855(and)X +4000(delete.)X +4241(A)X +555 4536(replace)N +808(operation)X +1131(is)X +1204(implemented)X +1642(as)X +1729(a)X +1785(delete)X +1997(followed)X +2302(by)X +2402(an)X +2498(insert)X +2696(and)X +2832(is)X +2905(logged)X +3143(accordingly.)X +3 f +555 4722(3.3.)N +715(Application)X +1134(Architectures)X +1 f +755 4845(The)N +907(structure)X +1215(of)X +1309(LIBTP)X +1558(allows)X +1794(application)X +2177(designers)X +2507(to)X +2596(trade)X +2784(off)X +2905(performance)X +3339(and)X +3481(protection.)X +3872(Since)X +4076(a)X +4138(large)X +555 4935(portion)N +810(of)X +901(LIBTP's)X +1205(functionality)X +1638(is)X +1715(provided)X +2024(by)X +2128(managing)X +2468(structures)X +2804(in)X +2889(shared)X +3122(memory,)X +3432(its)X +3530(structures)X +3865(are)X +3987(subject)X +4237(to)X +555 5025(corruption)N +926(by)X +1043(applications)X +1467(when)X +1678(the)X +1813(library)X +2064(is)X +2154(linked)X +2391(directly)X +2673(with)X +2852(the)X +2987(application.)X +3420(For)X +3568(this)X +3720(reason,)X +3987(LIBTP)X +4246(is)X +555 5115(designed)N +864(to)X +950(allow)X +1152(compilation)X +1558(into)X +1706(a)X +1766(separate)X +2053(server)X +2273(process)X +2537(which)X +2756(may)X +2917(be)X +3016(accessed)X +3321(via)X +3442(a)X +3501(socket)X +3729(interface.)X +4094(In)X +4184(this)X +555 5205(way)N +712(LIBTP's)X +1015(data)X +1172(structures)X +1507(are)X +1629(protected)X +1951(from)X +2130(application)X +2509(code,)X +2704(but)X +2829(communication)X +3349(overhead)X +3666(is)X +3741(increased.)X +4107(When)X +555 5295(applications)N +975(are)X +1107(trusted,)X +1377(LIBTP)X +1631(may)X +1801(be)X +1909(compiled)X +2239(directly)X +2516(into)X +2672(the)X +2802(application)X +3190(providing)X +3533(improved)X +3872(performance.)X +555 5385(Figures)N +815(two)X +955(and)X +1091(three)X +1272(show)X +1461(the)X +1579(two)X +1719(alternate)X +2016(application)X +2392(architectures.)X +755 5508(There)N +964(are)X +1084(potentially)X +1447(two)X +1588(modes)X +1818(in)X +1901(which)X +2118(one)X +2255(might)X +2462(use)X +2590(LIBTP)X +2833(in)X +2916(a)X +2972(server)X +3189(based)X +3392(architecture.)X +3832(In)X +3919(the)X +4037(\256rst,)X +4201(the)X +555 5598(server)N +778(would)X +1004(provide)X +1275(the)X +1399(capability)X +1741(to)X +1829(respond)X +2109(to)X +2197(requests)X +2486(to)X +2574(each)X +2747(of)X +2839(the)X +2962(low)X +3107(level)X +3288(modules)X +3584(\(lock,)X +3794(log,)X +3941(buffer,)X +4183(and)X +555 5688(transaction)N +944(managers\).)X +1356(Unfortunately,)X +1863(the)X +1998(performance)X +2442(of)X +2546(such)X +2730(a)X +2803(system)X +3062(is)X +3152(likely)X +3371(to)X +3470(be)X +3583(blindingly)X +3947(slow)X +4134(since)X + +7 p +%%Page: 7 7 +10 s 10 xH 0 xS 1 f +3 f +1 f +1 Dt +1864 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +1315 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +3 Dt +1133 1125 MXY +0 798 Dl +931 0 Dl +0 -798 Dl +-931 0 Dl +1 Dt +1266 1257 MXY +0 133 Dl +665 0 Dl +0 -133 Dl +-665 0 Dl +3 f +8 s +1513 1351(driver)N +1502 1617(LIBTP)N +1266 1390 MXY +0 400 Dl +665 0 Dl +0 -400 Dl +-665 0 Dl +3 Dt +1133 726 MXY +0 133 Dl +931 0 Dl +0 -133 Dl +-931 0 Dl +1 f +1029 1098(txn_abort)N +964 1015(txn_commit)N +1018 932(txn_begin)N +1910 1015(db_ops)N +3 f +1308 820(Application)N +1645(Program)X +1398 1218(Server)N +1594(Process)X +1 f +1390 986(socket)N +1569(interface)X +1 Dt +1848 967 MXY +-23 -14 Dl +8 14 Dl +-8 15 Dl +23 -15 Dl +-50 0 Dl +1324 MX +23 15 Dl +-9 -15 Dl +9 -14 Dl +-23 14 Dl +50 0 Dl +3 Dt +2862 859 MXY +0 1064 Dl +932 0 Dl +0 -1064 Dl +-932 0 Dl +1 Dt +3178 1390 MXY +24 -12 Dl +-17 0 Dl +-8 -15 Dl +1 27 Dl +150 -265 Dl +3494 1390 MXY +0 -27 Dl +-8 15 Dl +-16 1 Dl +24 11 Dl +-166 -265 Dl +3 f +3232 1617(LIBTP)N +2995 1390 MXY +0 400 Dl +666 0 Dl +0 -400 Dl +-666 0 Dl +992 MY +0 133 Dl +666 0 Dl +0 -133 Dl +-666 0 Dl +3168 1086(Application)N +1 f +2939 1201(txn_begin)N +2885 1284(txn_commit)N +2950 1368(txn_abort)N +3465 1284(db_ops)N +3 f +3155 766(Single)N +3339(Process)X +3 Dt +-1 Ds +811 2100(Figure)N +1023(2:)X +1107(Server)X +1318(Architecture.)X +1 f +1727(In)X +1811(this)X +1934(con\256guration,)X +811 2190(the)N +916(library)X +1113(is)X +1183(loaded)X +1380(into)X +1507(a)X +1562(server)X +1744(process)X +1962(which)X +2145(is)X +2214(ac-)X +811 2280(cessed)N +993(via)X +1087(a)X +1131(socket)X +1310(interface.)X +3 f +2563 2100(Figure)N +2803(3:)X +2914(Single)X +3140(Process)X +3403(Architecture.)X +1 f +3839(In)X +3950(this)X +2563 2190(con\256guration,)N +2948(the)X +3053(library)X +3250(routines)X +3483(are)X +3587(loaded)X +3784(as)X +3864(part)X +3990(of)X +2563 2280(the)N +2657(application)X +2957(and)X +3065(accessed)X +3303(via)X +3397(a)X +3441(subroutine)X +3727(interface.)X +10 s +10 f +555 2403(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2679(modifying)N +909(a)X +966(piece)X +1157(of)X +1245(data)X +1400(would)X +1621(require)X +1870(three)X +2051(or)X +2138(possibly)X +2424(four)X +2578(separate)X +2862(communications:)X +3433(one)X +3569(to)X +3651(lock)X +3809(the)X +3927(data,)X +4101(one)X +4237(to)X +555 2769(obtain)N +781(the)X +905(data,)X +1085(one)X +1227(to)X +1315(log)X +1443(the)X +1567(modi\256cation,)X +2017(and)X +2159(possibly)X +2451(one)X +2593(to)X +2681(transmit)X +2969(the)X +3093(modi\256ed)X +3403(data.)X +3583(Figure)X +3817(four)X +3976(shows)X +4201(the)X +555 2859(relative)N +826(performance)X +1263(for)X +1387(retrieving)X +1728(a)X +1793(single)X +2013(record)X +2248(using)X +2450(the)X +2577(record)X +2812(level)X +2997(call)X +3142(versus)X +3376(using)X +3578(the)X +3705(lower)X +3917(level)X +4102(buffer)X +555 2949(management)N +987(and)X +1125(locking)X +1387(calls.)X +1616(The)X +1763(2:1)X +1887(ratio)X +2056(observed)X +2367(in)X +2450(the)X +2569(single)X +2781(process)X +3043(case)X +3203(re\257ects)X +3456(the)X +3575(additional)X +3916(overhead)X +4232(of)X +555 3039(parsing)N +819(eight)X +1006(commands)X +1380(rather)X +1595(than)X +1760(one)X +1903(while)X +2108(the)X +2233(3:1)X +2362(ratio)X +2536(observed)X +2853(in)X +2942(the)X +3067(client/server)X +3491(architecture)X +3898(re\257ects)X +4157(both)X +555 3129(the)N +679(parsing)X +941(and)X +1083(the)X +1207(communication)X +1731(overheard.)X +2118(Although)X +2445(there)X +2631(may)X +2794(be)X +2895(applications)X +3307(which)X +3528(could)X +3731(tolerate)X +3997(such)X +4169(per-)X +555 3219(formance,)N +904(it)X +973(seems)X +1194(far)X +1309(more)X +1499(feasible)X +1774(to)X +1861(support)X +2126(a)X +2187(higher)X +2417(level)X +2597(interface,)X +2923(such)X +3094(as)X +3185(that)X +3329(provided)X +3638(by)X +3742(a)X +3802(query)X +4009(language)X +555 3309(\()N +2 f +582(e.g.)X +1 f +718(SQL)X +889([SQL86]\).)X +755 3432(Although)N +1081(LIBTP)X +1327(does)X +1498(not)X +1624(have)X +1800(an)X +1900(SQL)X +2075(parser,)X +2316(we)X +2433(have)X +2608(built)X +2777(a)X +2836(server)X +3056(application)X +3435(using)X +3631(the)X +3752(toolkit)X +3983(command)X +555 3522(language)N +882(\(TCL\))X +1124([OUST90].)X +1544(The)X +1706(server)X +1940(supports)X +2248(a)X +2321(command)X +2674(line)X +2831(interface)X +3150(similar)X +3409(to)X +3508(the)X +3643(subroutine)X +4017(interface)X +555 3612(de\256ned)N +811(in)X +3 f +893(db)X +1 f +981(\(3\).)X +1135(Since)X +1333(it)X +1397(is)X +1470(based)X +1673(on)X +1773(TCL,)X +1964(it)X +2028(provides)X +2324(control)X +2571(structures)X +2903(as)X +2990(well.)X +3 f +555 3798(4.)N +655(Implementation)X +1 f +3 f +555 3984(4.1.)N +715(Locking)X +1014(and)X +1162(Deadlock)X +1502(Detection)X +1 f +755 4107(LIBTP)N +1007(uses)X +1175(two-phase)X +1535(locking)X +1805(for)X +1929(user)X +2093(data.)X +2297(Strictly)X +2562(speaking,)X +2897(the)X +3024(two)X +3173(phases)X +3416(in)X +3507(two-phase)X +3866(locking)X +4135(are)X +4263(a)X +3 f +555 4197(grow)N +1 f +756(phase,)X +986(during)X +1221(which)X +1443(locks)X +1638(are)X +1763(acquired,)X +2086(and)X +2228(a)X +3 f +2290(shrink)X +1 f +2537(phase,)X +2766(during)X +3001(which)X +3223(locks)X +3418(are)X +3543(released.)X +3873(No)X +3997(lock)X +4161(may)X +555 4287(ever)N +720(be)X +822(acquired)X +1124(during)X +1358(the)X +1481(shrink)X +1706(phase.)X +1954(The)X +2104(grow)X +2294(phase)X +2502(lasts)X +2669(until)X +2840(the)X +2963(\256rst)X +3112(release,)X +3381(which)X +3602(marks)X +3823(the)X +3946(start)X +4109(of)X +4201(the)X +555 4377(shrink)N +780(phase.)X +1028(In)X +1120(practice,)X +1420(the)X +1543(grow)X +1733(phase)X +1941(lasts)X +2108(for)X +2227(the)X +2350(duration)X +2642(of)X +2734(a)X +2795(transaction)X +3172(in)X +3259(LIBTP)X +3506(and)X +3647(in)X +3734(commercial)X +4138(data-)X +555 4467(base)N +721(systems.)X +1037(The)X +1184(shrink)X +1406(phase)X +1611(takes)X +1798(place)X +1990(during)X +2221(transaction)X +2595(commit)X +2861(or)X +2950(abort.)X +3177(This)X +3341(means)X +3568(that)X +3710(locks)X +3901(are)X +4022(acquired)X +555 4557(on)N +655(demand)X +929(during)X +1158(the)X +1276(lifetime)X +1545(of)X +1632(a)X +1688(transaction,)X +2080(and)X +2216(held)X +2374(until)X +2540(commit)X +2804(time,)X +2986(at)X +3064(which)X +3280(point)X +3464(all)X +3564(locks)X +3753(are)X +3872(released.)X +755 4680(If)N +832(multiple)X +1121(transactions)X +1527(are)X +1649(active)X +1864(concurrently,)X +2313(deadlocks)X +2657(can)X +2792(occur)X +2994(and)X +3133(must)X +3311(be)X +3410(detected)X +3701(and)X +3840(resolved.)X +4174(The)X +555 4770(lock)N +715(table)X +893(can)X +1027(be)X +1125(thought)X +1391(of)X +1480(as)X +1569(a)X +1627(representation)X +2104(of)X +2193(a)X +2251(directed)X +2532(graph.)X +2777(The)X +2924(nodes)X +3133(in)X +3216(the)X +3335(graph)X +3539(are)X +3659(transactions.)X +4103(Edges)X +555 4860(represent)N +878(the)X +3 f +1004(waits-for)X +1 f +1340(relation)X +1613(between)X +1909(transactions;)X +2342(if)X +2419(transaction)X +2 f +2799(A)X +1 f +2876(is)X +2957(waiting)X +3225(for)X +3347(a)X +3411(lock)X +3577(held)X +3743(by)X +3851(transaction)X +2 f +4230(B)X +1 f +4279(,)X +555 4950(then)N +716(a)X +775(directed)X +1057(edge)X +1232(exists)X +1437(from)X +2 f +1616(A)X +1 f +1687(to)X +2 f +1771(B)X +1 f +1842(in)X +1926(the)X +2046(graph.)X +2291(A)X +2371(deadlock)X +2683(exists)X +2887(if)X +2958(a)X +3016(cycle)X +3208(appears)X +3476(in)X +3560(the)X +3680(graph.)X +3925(By)X +4040(conven-)X +555 5040(tion,)N +719(no)X +819(transaction)X +1191(ever)X +1350(waits)X +1539(for)X +1653(a)X +1709(lock)X +1867(it)X +1931(already)X +2188(holds,)X +2401(so)X +2492(re\257exive)X +2793(edges)X +2996(are)X +3115(impossible.)X +755 5163(A)N +836(distinguished)X +1285(process)X +1549(monitors)X +1856(the)X +1977(lock)X +2138(table,)X +2337(searching)X +2668(for)X +2785(cycles.)X +3048(The)X +3195(frequency)X +3539(with)X +3703(which)X +3921(this)X +4058(process)X +555 5253(runs)N +716(is)X +792(user-settable;)X +1243(for)X +1360(the)X +1481(multi-user)X +1833(tests)X +1998(discussed)X +2328(in)X +2413(section)X +2663(5.1.2,)X +2866(it)X +2933(has)X +3063(been)X +3238(set)X +3350(to)X +3435(wake)X +3628(up)X +3731(every)X +3932(second,)X +4197(but)X +555 5343(more)N +742(sophisticated)X +1182(schedules)X +1516(are)X +1636(certainly)X +1938(possible.)X +2261(When)X +2474(a)X +2531(cycle)X +2722(is)X +2796(detected,)X +3105(one)X +3242(of)X +3330(the)X +3449(transactions)X +3853(in)X +3936(the)X +4055(cycle)X +4246(is)X +555 5433(nominated)N +917(and)X +1057(aborted.)X +1362(When)X +1578(the)X +1700(transaction)X +2076(aborts,)X +2315(it)X +2382(rolls)X +2547(back)X +2722(its)X +2820(changes)X +3102(and)X +3241(releases)X +3519(its)X +3617(locks,)X +3829(thereby)X +4093(break-)X +555 5523(ing)N +677(the)X +795(cycle)X +985(in)X +1067(the)X +1185(graph.)X + +8 p +%%Page: 8 8 +10 s 10 xH 0 xS 1 f +3 f +1 f +4 Ds +1 Dt +1866 865 MXY +1338 0 Dl +1866 1031 MXY +1338 0 Dl +1866 1199 MXY +1338 0 Dl +1866 1366 MXY +1338 0 Dl +1866 1533 MXY +1338 0 Dl +1866 1701 MXY +1338 0 Dl +-1 Ds +5 Dt +1866 1868 MXY +1338 0 Dl +1 Dt +1 Di +2981 MX + 2981 1868 lineto + 2981 1575 lineto + 3092 1575 lineto + 3092 1868 lineto + 2981 1868 lineto +closepath 21 2981 1575 3092 1868 Dp +2646 MX + 2646 1868 lineto + 2646 949 lineto + 2758 949 lineto + 2758 1868 lineto + 2646 1868 lineto +closepath 14 2646 949 2758 1868 Dp +2312 MX + 2312 1868 lineto + 2312 1701 lineto + 2423 1701 lineto + 2423 1868 lineto + 2312 1868 lineto +closepath 3 2312 1701 2423 1868 Dp +1977 MX + 1977 1868 lineto + 1977 1512 lineto + 2089 1512 lineto + 2089 1868 lineto + 1977 1868 lineto +closepath 19 1977 1512 2089 1868 Dp +3 f +2640 2047(Client/Server)N +1957(Single)X +2185(Process)X +7 s +2957 1957(record)N +2570(component)X +2289(record)X +1890(components)X +1733 1724(.1)N +1733 1556(.2)N +1733 1389(.3)N +1733 1222(.4)N +1733 1055(.5)N +1733 889(.6)N +1590 726(Elapsed)N +1794(Time)X +1613 782(\(in)N +1693(seconds\))X +3 Dt +-1 Ds +8 s +555 2255(Figure)N +756(4:)X +829(Comparison)X +1187(of)X +1260(High)X +1416(and)X +1540(Low)X +1681(Level)X +1850(Interfaces.)X +1 f +2174(Elapsed)X +2395(time)X +2528(in)X +2597(seconds)X +2818(to)X +2887(perform)X +3111(a)X +3158(single)X +3330(record)X +3511(retrieval)X +3742(from)X +3885(a)X +3932(command)X +4203(line)X +555 2345(\(rather)N +751(than)X +888(a)X +943(procedural)X +1241(interface\))X +1510(is)X +1579(shown)X +1772(on)X +1862(the)X +1966(y)X +2024(axis.)X +2185(The)X +2310(``component'')X +2704(numbers)X +2950(re\257ect)X +3135(the)X +3239(timings)X +3458(when)X +3622(the)X +3726(record)X +3914(is)X +3983(retrieved)X +4235(by)X +555 2435(separate)N +785(calls)X +924(to)X +996(the)X +1096(lock)X +1228(manager)X +1469(and)X +1583(buffer)X +1760(manager)X +2001(while)X +2165(the)X +2264(``record'')X +2531(timings)X +2745(were)X +2889(obtained)X +3130(by)X +3215(using)X +3375(a)X +3424(single)X +3598(call)X +3711(to)X +3782(the)X +3881(record)X +4064(manager.)X +555 2525(The)N +674(2:1)X +776(ratio)X +913(observed)X +1163(for)X +1257(the)X +1355(single)X +1528(process)X +1739(case)X +1868(is)X +1930(a)X +1977(re\257ection)X +2237(of)X +2309(the)X +2406(parsing)X +2613(overhead)X +2865(for)X +2958(executing)X +3225(eight)X +3372(separate)X +3599(commands)X +3895(rather)X +4062(than)X +4191(one.)X +555 2615(The)N +673(additional)X +948(factor)X +1115(of)X +1187(one)X +1298(re\257ected)X +1536(in)X +1605(the)X +1702(3:1)X +1803(ratio)X +1939(for)X +2031(the)X +2127(client/server)X +2460(architecture)X +2794(is)X +2855(due)X +2965(to)X +3033(the)X +3129(communication)X +3545(overhead.)X +3828(The)X +3945(true)X +4062(ratios)X +4222(are)X +555 2705(actually)N +775(worse)X +945(since)X +1094(the)X +1190(component)X +1492(timings)X +1703(do)X +1785(not)X +1884(re\257ect)X +2060(the)X +2155(search)X +2334(times)X +2490(within)X +2671(each)X +2804(page)X +2941(or)X +3011(the)X +3106(time)X +3237(required)X +3466(to)X +3533(transmit)X +3760(the)X +3855(page)X +3992(between)X +4221(the)X +555 2795(two)N +667(processes.)X +10 s +10 f +555 2885(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3161(4.2.)N +715(Group)X +961(Commit)X +1 f +755 3284(Since)N +959(the)X +1083(log)X +1211(must)X +1392(be)X +1494(\257ushed)X +1751(to)X +1839(disk)X +1997(at)X +2080(commit)X +2349(time,)X +2536(disk)X +2694(bandwidth)X +3057(fundamentally)X +3545(limits)X +3751(the)X +3874(rate)X +4020(at)X +4103(which)X +555 3374(transactions)N +959(complete.)X +1314(Since)X +1513(most)X +1688(transactions)X +2091(write)X +2276(only)X +2438(a)X +2494(few)X +2635(small)X +2828(records)X +3085(to)X +3167(the)X +3285(log,)X +3427(the)X +3545(last)X +3676(page)X +3848(of)X +3935(the)X +4053(log)X +4175(will)X +555 3464(be)N +658(\257ushed)X +916(once)X +1095(by)X +1202(every)X +1408(transaction)X +1787(which)X +2010(writes)X +2233(to)X +2322(it.)X +2433(In)X +2527(the)X +2652(naive)X +2853(implementation,)X +3402(these)X +3593(\257ushes)X +3841(would)X +4067(happen)X +555 3554(serially.)N +755 3677(LIBTP)N +1008(uses)X +3 f +1177(group)X +1412(commit)X +1 f +1702([DEWI84])X +2077(in)X +2170(order)X +2371(to)X +2464(amortize)X +2775(the)X +2903(cost)X +3062(of)X +3159(one)X +3305(synchronous)X +3740(disk)X +3903(write)X +4098(across)X +555 3767(multiple)N +851(transactions.)X +1304(Group)X +1539(commit)X +1812(provides)X +2117(a)X +2182(way)X +2345(for)X +2468(a)X +2533(group)X +2749(of)X +2845(transactions)X +3257(to)X +3348(commit)X +3621(simultaneously.)X +4174(The)X +555 3857(\256rst)N +709(several)X +967(transactions)X +1380(to)X +1472(commit)X +1745(write)X +1939(their)X +2115(changes)X +2403(to)X +2494(the)X +2621(in-memory)X +3006(log)X +3137(page,)X +3338(then)X +3505(sleep)X +3699(on)X +3808(a)X +3873(distinguished)X +555 3947(semaphore.)N +966(Later,)X +1179(a)X +1238(committing)X +1629(transaction)X +2004(\257ushes)X +2249(the)X +2370(page)X +2545(to)X +2630(disk,)X +2805(and)X +2943(wakes)X +3166(up)X +3268(all)X +3370(its)X +3467(sleeping)X +3756(peers.)X +3988(The)X +4135(point)X +555 4037(at)N +635(which)X +853(changes)X +1134(are)X +1255(actually)X +1531(written)X +1780(is)X +1855(determined)X +2238(by)X +2340(three)X +2523(thresholds.)X +2914(The)X +3061(\256rst)X +3207(is)X +3281(the)X +2 f +3400(group)X +3612(threshold)X +1 f +3935(and)X +4072(de\256nes)X +555 4127(the)N +674(minimum)X +1005(number)X +1271(of)X +1359(transactions)X +1763(which)X +1979(must)X +2154(be)X +2250(active)X +2462(in)X +2544(the)X +2662(system)X +2904(before)X +3130(transactions)X +3533(are)X +3652(forced)X +3878(to)X +3960(participate)X +555 4217(in)N +646(a)X +711(group)X +927(commit.)X +1240(The)X +1394(second)X +1646(is)X +1728(the)X +2 f +1855(wait)X +2021(threshold)X +1 f +2352(which)X +2577(is)X +2658(expressed)X +3003(as)X +3098(the)X +3224(percentage)X +3601(of)X +3696(active)X +3916(transactions)X +555 4307(waiting)N +826(to)X +919(be)X +1026(committed.)X +1439(The)X +1595(last)X +1737(is)X +1821(the)X +2 f +1950(logdelay)X +2257(threshold)X +1 f +2590(which)X +2816(indicates)X +3131(how)X +3299(much)X +3507(un\257ushed)X +3848(log)X +3980(should)X +4223(be)X +555 4397(allowed)N +829(to)X +911(accumulate)X +1297(before)X +1523(a)X +1579(waiting)X +1839(transaction's)X +2289(commit)X +2553(record)X +2779(is)X +2852(\257ushed.)X +755 4520(Group)N +981(commit)X +1246(can)X +1379(substantially)X +1803(improve)X +2090(performance)X +2517(for)X +2631(high-concurrency)X +3218(environments.)X +3714(If)X +3788(only)X +3950(a)X +4006(few)X +4147(tran-)X +555 4610(sactions)N +836(are)X +957(running,)X +1248(it)X +1314(is)X +1389(unlikely)X +1673(to)X +1757(improve)X +2046(things)X +2263(at)X +2343(all.)X +2485(The)X +2632(crossover)X +2962(point)X +3148(is)X +3223(the)X +3343(point)X +3529(at)X +3609(which)X +3827(the)X +3947(transaction)X +555 4700(commit)N +823(rate)X +968(is)X +1045(limited)X +1295(by)X +1399(the)X +1521(bandwidth)X +1883(of)X +1974(the)X +2096(device)X +2330(on)X +2434(which)X +2654(the)X +2776(log)X +2902(resides.)X +3189(If)X +3267(processes)X +3599(are)X +3722(trying)X +3937(to)X +4023(\257ush)X +4201(the)X +555 4790(log)N +677(faster)X +876(than)X +1034(the)X +1152(log)X +1274(disk)X +1427(can)X +1559(accept)X +1785(data,)X +1959(then)X +2117(group)X +2324(commit)X +2588(will)X +2732(increase)X +3016(the)X +3134(commit)X +3398(rate.)X +3 f +555 4976(4.3.)N +715(Kernel)X +971(Intervention)X +1418(for)X +1541(Synchronization)X +1 f +755 5099(Since)N +954(LIBTP)X +1197(uses)X +1356(data)X +1511(in)X +1594(shared)X +1825(memory)X +2113(\()X +2 f +2140(e.g.)X +1 f +2277(the)X +2395(lock)X +2553(table)X +2729(and)X +2865(buffer)X +3082(pool\))X +3271(it)X +3335(must)X +3510(be)X +3606(possible)X +3888(for)X +4002(a)X +4058(process)X +555 5189(to)N +640(acquire)X +900(exclusive)X +1226(access)X +1454(to)X +1538(shared)X +1770(data)X +1926(in)X +2010(order)X +2202(to)X +2286(prevent)X +2549(corruption.)X +2945(In)X +3034(addition,)X +3338(the)X +3458(process)X +3721(manager)X +4020(must)X +4197(put)X +555 5279(processes)N +886(to)X +971(sleep)X +1159(when)X +1356(the)X +1477(lock)X +1638(or)X +1728(buffer)X +1948(they)X +2109(request)X +2364(is)X +2440(in)X +2525(use)X +2655(by)X +2758(some)X +2950(other)X +3138(process.)X +3441(In)X +3530(the)X +3650(LIBTP)X +3894(implementa-)X +555 5385(tion)N +705(under)X +914(Ultrix)X +1131(4.0)X +7 s +5353(2)Y +10 s +5385(,)Y +1305(we)X +1424(use)X +1556(System)X +1816(V)X +1899(semaphores)X +2303(to)X +2390(provide)X +2660(this)X +2800(synchronization.)X +3377(Semaphores)X +3794(implemented)X +4237(in)X +555 5475(this)N +701(fashion)X +968(turn)X +1128(out)X +1261(to)X +1354(be)X +1461(an)X +1568(expensive)X +1920(choice)X +2161(for)X +2285(synchronization,)X +2847(because)X +3132(each)X +3310(access)X +3546(traps)X +3732(to)X +3824(the)X +3952(kernel)X +4183(and)X +8 s +10 f +555 5547(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5625(2)N +8 s +763 5650(Ultrix)N +932(and)X +1040(DEC)X +1184(are)X +1277(trademarks)X +1576(of)X +1645(Digital)X +1839(Equipment)X +2136(Corporation.)X + +9 p +%%Page: 9 9 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(executes)N +852(atomically)X +1210(there.)X +755 753(On)N +878(architectures)X +1314(that)X +1459(support)X +1724(atomic)X +1967(test-and-set,)X +2382(a)X +2443(much)X +2646(better)X +2854(choice)X +3089(would)X +3314(be)X +3415(to)X +3502(attempt)X +3767(to)X +3854(obtain)X +4079(a)X +4139(spin-)X +555 843(lock)N +714(with)X +877(a)X +934(test-and-set,)X +1345(and)X +1482(issue)X +1663(a)X +1720(system)X +1963(call)X +2100(only)X +2263(if)X +2333(the)X +2452(spinlock)X +2744(is)X +2818(unavailable.)X +3249(Since)X +3447(virtually)X +3738(all)X +3838(semaphores)X +4237(in)X +555 933(LIBTP)N +801(are)X +924(uncontested)X +1330(and)X +1469(are)X +1591(held)X +1752(for)X +1869(very)X +2035(short)X +2218(periods)X +2477(of)X +2567(time,)X +2752(this)X +2890(would)X +3113(improve)X +3403(performance.)X +3873(For)X +4007(example,)X +555 1023(processes)N +885(must)X +1062(acquire)X +1321(exclusive)X +1646(access)X +1874(to)X +1958(buffer)X +2177(pool)X +2341(metadata)X +2653(in)X +2737(order)X +2929(to)X +3013(\256nd)X +3159(and)X +3297(pin)X +3421(a)X +3479(buffer)X +3698(in)X +3781(shared)X +4012(memory.)X +555 1113(This)N +721(semaphore)X +1093(is)X +1170(requested)X +1502(most)X +1681(frequently)X +2034(in)X +2119(LIBTP.)X +2404(However,)X +2742(once)X +2917(it)X +2984(is)X +3060(acquired,)X +3380(only)X +3545(a)X +3604(few)X +3748(instructions)X +4144(must)X +555 1203(be)N +656(executed)X +966(before)X +1196(it)X +1264(is)X +1341(released.)X +1669(On)X +1791(one)X +1931(architecture)X +2335(for)X +2453(which)X +2673(we)X +2791(were)X +2972(able)X +3130(to)X +3216(gather)X +3441(detailed)X +3719(pro\256ling)X +4018(informa-)X +555 1293(tion,)N +729(the)X +857(cost)X +1015(of)X +1111(the)X +1238(semaphore)X +1615(calls)X +1791(accounted)X +2146(for)X +2269(25%)X +2445(of)X +2541(the)X +2668(total)X +2839(time)X +3010(spent)X +3208(updating)X +3517(the)X +3644(metadata.)X +4003(This)X +4174(was)X +555 1383(fairly)N +749(consistent)X +1089(across)X +1310(most)X +1485(of)X +1572(the)X +1690(critical)X +1933(sections.)X +755 1506(In)N +848(an)X +950(attempt)X +1216(to)X +1304(quantify)X +1597(the)X +1720(overhead)X +2040(of)X +2132(kernel)X +2358(synchronization,)X +2915(we)X +3034(ran)X +3162(tests)X +3329(on)X +3434(a)X +3495(version)X +3756(of)X +3848(4.3BSD-Reno)X +555 1596(which)N +786(had)X +937(been)X +1123(modi\256ed)X +1441(to)X +1537(support)X +1811(binary)X +2050(semaphore)X +2432(facilities)X +2742(similar)X +2998(to)X +3094(those)X +3297(described)X +3639(in)X +3735([POSIX91].)X +4174(The)X +555 1686(hardware)N +880(platform)X +1181(consisted)X +1504(of)X +1595(an)X +1695(HP300)X +1941(\(33MHz)X +2237(MC68030\))X +2612(workstation)X +3014(with)X +3180(16MBytes)X +3537(of)X +3628(main)X +3812(memory,)X +4123(and)X +4263(a)X +555 1776(600MByte)N +920(HP7959)X +1205(SCSI)X +1396(disk)X +1552(\(17)X +1682(ms)X +1798(average)X +2072(seek)X +2237(time\).)X +2468(We)X +2602(ran)X +2727(three)X +2910(sets)X +3052(of)X +3141(comparisons)X +3568(which)X +3786(are)X +3907(summarized)X +555 1866(in)N +645(\256gure)X +860(\256ve.)X +1028(In)X +1123(each)X +1299(comparison)X +1701(we)X +1823(ran)X +1954(two)X +2102(tests,)X +2292(one)X +2436(using)X +2637(hardware)X +2965(spinlocks)X +3295(and)X +3438(the)X +3563(other)X +3755(using)X +3955(kernel)X +4183(call)X +555 1956(synchronization.)N +1135(Since)X +1341(the)X +1467(test)X +1606(was)X +1758(run)X +1892(single-user,)X +2291(none)X +2474(of)X +2568(the)X +2693(the)X +2818(locks)X +3014(were)X +3198(contested.)X +3568(In)X +3662(the)X +3787(\256rst)X +3938(two)X +4085(sets)X +4232(of)X +555 2046(tests,)N +743(we)X +863(ran)X +992(the)X +1116(full)X +1253(transaction)X +1631(processing)X +2000(benchmark)X +2383(described)X +2717(in)X +2805(section)X +3058(5.1.)X +3223(In)X +3315(one)X +3456(case)X +3620(we)X +3739(ran)X +3867(with)X +4034(both)X +4201(the)X +555 2136(database)N +854(and)X +992(log)X +1116(on)X +1218(the)X +1338(same)X +1525(disk)X +1680(\(1)X +1769(Disk\))X +1969(and)X +2107(in)X +2191(the)X +2311(second,)X +2576(we)X +2692(ran)X +2817(with)X +2981(the)X +3101(database)X +3400(and)X +3538(log)X +3661(on)X +3762(separate)X +4047(disks)X +4232(\(2)X +555 2226(Disk\).)N +800(In)X +894(the)X +1019(last)X +1157(test,)X +1315(we)X +1436(wanted)X +1695(to)X +1784(create)X +2004(a)X +2067(CPU)X +2249(bound)X +2476(environment,)X +2928(so)X +3026(we)X +3146(used)X +3319(a)X +3381(database)X +3684(small)X +3883(enough)X +4145(to)X +4233(\256t)X +555 2316(completely)N +941(in)X +1033(the)X +1161(cache)X +1375(and)X +1521(issued)X +1751(read-only)X +2089(transactions.)X +2541(The)X +2695(results)X +2933(in)X +3024(\256gure)X +3240(\256ve)X +3389(express)X +3659(the)X +3786(kernel)X +4016(call)X +4161(syn-)X +555 2406(chronization)N +980(performance)X +1411(as)X +1502(a)X +1562(percentage)X +1935(of)X +2026(the)X +2148(spinlock)X +2443(performance.)X +2914(For)X +3049(example,)X +3365(in)X +3451(the)X +3573(1)X +3637(disk)X +3794(case,)X +3977(the)X +4098(kernel)X +555 2496(call)N +697(implementation)X +1225(achieved)X +1537(4.4)X +1662(TPS)X +1824(\(transactions)X +2259(per)X +2387(second\))X +2662(while)X +2865(the)X +2988(semaphore)X +3361(implementation)X +3888(achieved)X +4199(4.6)X +555 2586(TPS,)N +735(and)X +874(the)X +995(relative)X +1259(performance)X +1689(of)X +1779(the)X +1900(kernel)X +2123(synchronization)X +2657(is)X +2732(96%)X +2901(that)X +3043(of)X +3132(the)X +3252(spinlock)X +3545(\(100)X +3714(*)X +3776(4.4)X +3898(/)X +3942(4.6\).)X +4111(There)X +555 2676(are)N +674(two)X +814(striking)X +1078(observations)X +1503(from)X +1679(these)X +1864(results:)X +10 f +635 2799(g)N +1 f +755(even)X +927(when)X +1121(the)X +1239(system)X +1481(is)X +1554(disk)X +1707(bound,)X +1947(the)X +2065(CPU)X +2240(cost)X +2389(of)X +2476(synchronization)X +3008(is)X +3081(noticeable,)X +3451(and)X +10 f +635 2922(g)N +1 f +755(when)X +949(we)X +1063(are)X +1182(CPU)X +1357(bound,)X +1597(the)X +1715(difference)X +2062(is)X +2135(dramatic)X +2436(\(67%\).)X +3 f +555 3108(4.4.)N +715(Transaction)X +1148(Protected)X +1499(Access)X +1747(Methods)X +1 f +755 3231(The)N +903(B-tree)X +1127(and)X +1266(\256xed)X +1449(length)X +1671(recno)X +1872(\(record)X +2127(number\))X +2421(access)X +2649(methods)X +2942(have)X +3116(been)X +3290(modi\256ed)X +3596(to)X +3680(provide)X +3947(transaction)X +555 3321(protection.)N +941(Whereas)X +1244(the)X +1363(previously)X +1722(published)X +2054(interface)X +2357(to)X +2440(the)X +2559(access)X +2786(routines)X +3065(had)X +3202(separate)X +3487(open)X +3664(calls)X +3832(for)X +3946(each)X +4114(of)X +4201(the)X +10 f +555 3507(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 Dt +2978 5036 MXY + 2978 5036 lineto + 2978 4662 lineto + 3093 4662 lineto + 3093 5036 lineto + 2978 5036 lineto +closepath 21 2978 4662 3093 5036 Dp +2518 MX + 2518 5036 lineto + 2518 3960 lineto + 2633 3960 lineto + 2633 5036 lineto + 2518 5036 lineto +closepath 3 2518 3960 2633 5036 Dp +2059 MX + 2059 5036 lineto + 2059 3946 lineto + 2174 3946 lineto + 2174 5036 lineto + 2059 5036 lineto +closepath 1 2059 3946 2174 5036 Dp +3 f +7 s +2912 5141(Read-only)N +1426 3767(of)N +1487(Spinlock)X +1710(Throughput)X +1480 3710(Throughput)N +1786(as)X +1850(a)X +1892(%)X +11 s +1670 4843(20)N +1670 4614(40)N +1670 4384(60)N +1670 4155(80)N +1648 3925(100)N +7 s +2041 5141(1)N +2083(Disk)X +2490(2)X +2532(Disks)X +5 Dt +1829 5036 MXY +1494 0 Dl +4 Ds +1 Dt +1829 4806 MXY +1494 0 Dl +1829 4577 MXY +1494 0 Dl +1829 4347 MXY +1494 0 Dl +1829 4118 MXY +1494 0 Dl +1829 3888 MXY +1494 0 Dl +3 Dt +-1 Ds +8 s +555 5360(Figure)N +753(5:)X +823(Kernel)X +1028(Overhead)X +1315(for)X +1413(System)X +1625(Call)X +1756(Synchronization.)X +1 f +2254(The)X +2370(performance)X +2708(of)X +2778(the)X +2873(kernel)X +3049(call)X +3158(synchronization)X +3583(is)X +3643(expressed)X +3911(as)X +3980(a)X +4024(percentage)X +555 5450(of)N +625(the)X +720(spinlock)X +954(synchronization)X +1379(performance.)X +1749(In)X +1819(disk)X +1943(bound)X +2120(cases)X +2271(\(1)X +2341(Disk)X +2479(and)X +2588(2)X +2637(Disks\),)X +2837(we)X +2928(see)X +3026(that)X +3139(4-6%)X +3294(of)X +3364(the)X +3459(performance)X +3797(is)X +3857(lost)X +3966(due)X +4074(to)X +4140(kernel)X +555 5540(calls)N +688(while)X +846(in)X +912(the)X +1006(CPU)X +1147(bound)X +1323(case,)X +1464(we)X +1554(have)X +1690(lost)X +1799(67%)X +1932(of)X +2001(the)X +2095(performance)X +2432(due)X +2540(to)X +2606(kernel)X +2781(calls.)X + +10 p +%%Page: 10 10 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(access)N +781(methods,)X +1092(we)X +1206(now)X +1364(have)X +1536(an)X +1632(integrated)X +1973(open)X +2149(call)X +2285(with)X +2447(the)X +2565(following)X +2896(calling)X +3134(conventions:)X +7 f +715 753(DB)N +859(*dbopen)X +1243(\(const)X +1579(char)X +1819(*file,)X +2155(int)X +2347(flags,)X +2683(int)X +2875(mode,)X +3163(DBTYPE)X +3499(type,)X +1291 843(int)N +1483(dbflags,)X +1915(const)X +2203(void)X +2443(*openinfo\))X +1 f +555 966(where)N +2 f +774(\256le)X +1 f +894(is)X +969(the)X +1089(name)X +1285(of)X +1374(the)X +1494(\256le)X +1618(being)X +1818(opened,)X +2 f +2092(\257ags)X +1 f +2265(and)X +2 f +2402(mode)X +1 f +2597(are)X +2717(the)X +2836(standard)X +3129(arguments)X +3484(to)X +3 f +3567(open)X +1 f +3731(\(2\),)X +2 f +3866(type)X +1 f +4021(is)X +4095(one)X +4232(of)X +555 1056(the)N +680(access)X +913(method)X +1180(types,)X +2 f +1396(db\257ags)X +1 f +1654(indicates)X +1966(the)X +2091(mode)X +2296(of)X +2390(the)X +2515(buffer)X +2739(pool)X +2907(and)X +3049(transaction)X +3427(protection,)X +3798(and)X +2 f +3940(openinfo)X +1 f +4246(is)X +555 1146(the)N +681(access)X +915(method)X +1183(speci\256c)X +1456(information.)X +1902(Currently,)X +2257(the)X +2383(possible)X +2673(values)X +2906(for)X +2 f +3028(db\257ags)X +1 f +3287(are)X +3414(DB_SHARED)X +3912(and)X +4055(DB_TP)X +555 1236(indicating)N +895(that)X +1035(buffers)X +1283(should)X +1516(be)X +1612(kept)X +1770(in)X +1852(a)X +1908(shared)X +2138(buffer)X +2355(pool)X +2517(and)X +2653(that)X +2793(the)X +2911(\256le)X +3033(should)X +3266(be)X +3362(transaction)X +3734(protected.)X +755 1359(The)N +900(modi\256cations)X +1355(required)X +1643(to)X +1725(add)X +1861(transaction)X +2233(protection)X +2578(to)X +2660(an)X +2756(access)X +2982(method)X +3242(are)X +3361(quite)X +3541(simple)X +3774(and)X +3910(localized.)X +715 1482(1.)N +795(Replace)X +1074(\256le)X +2 f +1196(open)X +1 f +1372(with)X +2 f +1534(buf_open)X +1 f +1832(.)X +715 1572(2.)N +795(Replace)X +1074(\256le)X +2 f +1196(read)X +1 f +1363(and)X +2 f +1499(write)X +1 f +1683(calls)X +1850(with)X +2012(buffer)X +2229(manager)X +2526(calls)X +2693(\()X +2 f +2720(buf_get)X +1 f +(,)S +2 f +3000(buf_unpin)X +1 f +3324(\).)X +715 1662(3.)N +795(Precede)X +1070(buffer)X +1287(manager)X +1584(calls)X +1751(with)X +1913(an)X +2009(appropriate)X +2395(\(read)X +2581(or)X +2668(write\))X +2880(lock)X +3038(call.)X +715 1752(4.)N +795(Before)X +1034(updates,)X +1319(issue)X +1499(a)X +1555(logging)X +1819(operation.)X +715 1842(5.)N +795(After)X +985(data)X +1139(have)X +1311(been)X +1483(accessed,)X +1805(release)X +2049(the)X +2167(buffer)X +2384(manager)X +2681(pin.)X +715 1932(6.)N +795(Provide)X +1064(undo/redo)X +1409(code)X +1581(for)X +1695(each)X +1863(type)X +2021(of)X +2108(log)X +2230(record)X +2456(de\256ned.)X +555 2071(The)N +702(following)X +1035(code)X +1209(fragments)X +1552(show)X +1743(how)X +1903(to)X +1987(transaction)X +2361(protect)X +2606(several)X +2856(updates)X +3123(to)X +3206(a)X +3263(B-tree.)X +7 s +3484 2039(3)N +10 s +3533 2071(In)N +3621(the)X +3740(unprotected)X +4140(case,)X +555 2161(an)N +652(open)X +829(call)X +966(is)X +1040(followed)X +1346(by)X +1447(a)X +1504(read)X +1664(call)X +1801(to)X +1884(obtain)X +2105(the)X +2224(meta-data)X +2562(for)X +2677(the)X +2796(B-tree.)X +3058(Instead,)X +3331(we)X +3446(issue)X +3627(an)X +3724(open)X +3901(to)X +3984(the)X +4102(buffer)X +555 2251(manager)N +852(to)X +934(obtain)X +1154(a)X +1210(\256le)X +1332(id)X +1414(and)X +1550(a)X +1606(buffer)X +1823(request)X +2075(to)X +2157(obtain)X +2377(the)X +2495(meta-data)X +2832(as)X +2919(shown)X +3148(below.)X +7 f +715 2374(char)N +955(*path;)X +715 2464(int)N +907(fid,)X +1147(flags,)X +1483(len,)X +1723(mode;)X +715 2644(/*)N +859(Obtain)X +1195(a)X +1291(file)X +1531(id)X +1675(with)X +1915(which)X +2203(to)X +2347(access)X +2683(the)X +2875(buffer)X +3211(pool)X +3451(*/)X +715 2734(fid)N +907(=)X +1003(buf_open\(path,)X +1723(flags,)X +2059(mode\);)X +715 2914(/*)N +859(Read)X +1099(the)X +1291(meta)X +1531(data)X +1771(\(page)X +2059(0\))X +2203(for)X +2395(the)X +2587(B-tree)X +2923(*/)X +715 3004(if)N +859(\(tp_lock\(fid,)X +1531(0,)X +1675(READ_LOCK\)\))X +1003 3094(return)N +1339(error;)X +715 3184(meta_data_ptr)N +1387(=)X +1483(buf_get\(fid,)X +2107(0,)X +2251(BF_PIN,)X +2635(&len\);)X +1 f +555 3307(The)N +714(BF_PIN)X +1014(argument)X +1350(to)X +2 f +1445(buf_get)X +1 f +1718(indicates)X +2036(that)X +2189(we)X +2316(wish)X +2500(to)X +2595(leave)X +2798(this)X +2946(page)X +3131(pinned)X +3382(in)X +3477(memory)X +3777(so)X +3881(that)X +4034(it)X +4111(is)X +4197(not)X +555 3397(swapped)N +862(out)X +990(while)X +1194(we)X +1314(are)X +1439(accessing)X +1772(it.)X +1881(The)X +2031(last)X +2167(argument)X +2495(to)X +2 f +2582(buf_get)X +1 f +2847(returns)X +3095(the)X +3218(number)X +3488(of)X +3580(bytes)X +3774(on)X +3879(the)X +4002(page)X +4179(that)X +555 3487(were)N +732(valid)X +912(so)X +1003(that)X +1143(the)X +1261(access)X +1487(method)X +1747(may)X +1905(initialize)X +2205(the)X +2323(page)X +2495(if)X +2564(necessary.)X +755 3610(Next,)N +955(consider)X +1251(inserting)X +1555(a)X +1615(record)X +1845(on)X +1949(a)X +2009(particular)X +2341(page)X +2517(of)X +2608(a)X +2668(B-tree.)X +2932(In)X +3022(the)X +3143(unprotected)X +3545(case,)X +3727(we)X +3844(read)X +4006(the)X +4127(page,)X +555 3700(call)N +2 f +693(_bt_insertat)X +1 f +1079(,)X +1121(and)X +1258(write)X +1444(the)X +1563(page.)X +1776(Instead,)X +2049(we)X +2164(lock)X +2323(the)X +2442(page,)X +2635(request)X +2888(the)X +3007(buffer,)X +3245(log)X +3368(the)X +3487(change,)X +3756(modify)X +4008(the)X +4127(page,)X +555 3790(and)N +691(release)X +935(the)X +1053(buffer.)X +7 f +715 3913(int)N +907(fid,)X +1147(len,)X +1387(pageno;)X +1867(/*)X +2011(Identifies)X +2539(the)X +2731(buffer)X +3067(*/)X +715 4003(int)N +907(index;)X +1867(/*)X +2011(Location)X +2443(at)X +2587(which)X +2875(to)X +3019(insert)X +3355(the)X +3547(new)X +3739(pair)X +3979(*/)X +715 4093(DBT)N +907(*keyp,)X +1243(*datap;)X +1867(/*)X +2011(Key/Data)X +2443(pair)X +2683(to)X +2827(be)X +2971(inserted)X +3403(*/)X +715 4183(DATUM)N +1003(*d;)X +1867(/*)X +2011(Key/data)X +2443(structure)X +2923(to)X +3067(insert)X +3403(*/)X +715 4363(/*)N +859(Lock)X +1099(and)X +1291(request)X +1675(the)X +1867(buffer)X +2203(*/)X +715 4453(if)N +859(\(tp_lock\(fid,)X +1531(pageno,)X +1915(WRITE_LOCK\)\))X +1003 4543(return)N +1339(error;)X +715 4633(buffer_ptr)N +1243(=)X +1339(buf_get\(fid,)X +1963(pageno,)X +2347(BF_PIN,)X +2731(&len\);)X +715 4813(/*)N +859(Log)X +1051(and)X +1243(perform)X +1627(the)X +1819(update)X +2155(*/)X +715 4903(log_insdel\(BTREE_INSERT,)N +1915(fid,)X +2155(pageno,)X +2539(keyp,)X +2827(datap\);)X +715 4993(_bt_insertat\(buffer_ptr,)N +1915(d,)X +2059(index\);)X +715 5083(buf_unpin\(buffer_ptr\);)N +1 f +555 5206(Succinctly,)N +942(the)X +1068(algorithm)X +1407(for)X +1529(turning)X +1788(unprotected)X +2195(code)X +2375(into)X +2527(protected)X +2854(code)X +3034(is)X +3115(to)X +3205(replace)X +3466(read)X +3633(operations)X +3995(with)X +2 f +4165(lock)X +1 f +555 5296(and)N +2 f +691(buf_get)X +1 f +951(operations)X +1305(and)X +1441(write)X +1626(operations)X +1980(with)X +2 f +2142(log)X +1 f +2264(and)X +2 f +2400(buf_unpin)X +1 f +2744(operations.)X +8 s +10 f +555 5458(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5536(3)N +8 s +766 5561(The)N +884(following)X +1152(code)X +1291(fragments)X +1565(are)X +1661(examples,)X +1937(but)X +2038(do)X +2120(not)X +2220(de\256ne)X +2394(the)X +2490(\256nal)X +2622(interface.)X +2894(The)X +3011(\256nal)X +3143(interface)X +3383(will)X +3501(be)X +3579(determined)X +3884(after)X +4018(LIBTP)X +4214(has)X +555 5633(been)N +691(fully)X +828(integrated)X +1099(with)X +1229(the)X +1323(most)X +1464(recent)X +3 f +1635(db)X +1 f +1707(\(3\))X +1797(release)X +1989(from)X +2129(the)X +2223(Computer)X +2495(Systems)X +2725(Research)X +2974(Group)X +3153(at)X +3215(University)X +3501(of)X +3570(California,)X +3861(Berkeley.)X + +11 p +%%Page: 11 11 +8 s 8 xH 0 xS 1 f +10 s +3 f +555 630(5.)N +655(Performance)X +1 f +755 753(In)N +845(this)X +983(section,)X +1253(we)X +1370(present)X +1625(the)X +1746(results)X +1978(of)X +2067(two)X +2209(very)X +2374(different)X +2673(benchmarks.)X +3103(The)X +3250(\256rst)X +3396(is)X +3471(an)X +3569(online)X +3791(transaction)X +4165(pro-)X +555 843(cessing)N +824(benchmark,)X +1234(similar)X +1489(to)X +1584(the)X +1715(standard)X +2020(TPCB,)X +2272(but)X +2407(has)X +2547(been)X +2732(adapted)X +3015(to)X +3110(run)X +3250(in)X +3345(a)X +3414(desktop)X +3696(environment.)X +4174(The)X +555 933(second)N +798(emulates)X +1103(a)X +1159(computer-aided)X +1683(design)X +1912(environment)X +2337(and)X +2473(provides)X +2769(more)X +2954(complex)X +3250(query)X +3453(processing.)X +3 f +555 1119(5.1.)N +715(Transaction)X +1148(Processing)X +1533(Benchmark)X +1 f +755 1242(For)N +887(this)X +1023(section,)X +1291(all)X +1392(performance)X +1820(numbers)X +2117(shown)X +2346(except)X +2576(for)X +2690(the)X +2808(commercial)X +3207(database)X +3504(system)X +3746(were)X +3923(obtained)X +4219(on)X +555 1332(a)N +614(DECstation)X +1009(5000/200)X +1333(with)X +1497(32MBytes)X +1852(of)X +1941(memory)X +2230(running)X +2501(Ultrix)X +2714(V4.0,)X +2914(accessing)X +3244(a)X +3302(DEC)X +3484(RZ57)X +3688(1GByte)X +3959(disk)X +4114(drive.)X +555 1422(The)N +720(commercial)X +1139(relational)X +1482(database)X +1799(system)X +2061(tests)X +2242(were)X +2438(run)X +2584(on)X +2703(a)X +2778(comparable)X +3192(machine,)X +3523(a)X +3598(Sparcstation)X +4033(1+)X +4157(with)X +555 1512(32MBytes)N +915(memory)X +1209(and)X +1352(a)X +1415(1GByte)X +1691(external)X +1976(disk)X +2135(drive.)X +2366(The)X +2517(database,)X +2840(binaries)X +3120(and)X +3262(log)X +3390(resided)X +3648(on)X +3754(the)X +3878(same)X +4069(device.)X +555 1602(Reported)N +869(times)X +1062(are)X +1181(the)X +1299(means)X +1524(of)X +1611(\256ve)X +1751(tests)X +1913(and)X +2049(have)X +2221(standard)X +2513(deviations)X +2862(within)X +3086(two)X +3226(percent)X +3483(of)X +3570(the)X +3688(mean.)X +755 1725(The)N +905(test)X +1041(database)X +1343(was)X +1493(con\256gured)X +1861(according)X +2203(to)X +2290(the)X +2413(TPCB)X +2637(scaling)X +2889(rules)X +3070(for)X +3189(a)X +3250(10)X +3355(transaction)X +3732(per)X +3860(second)X +4108(\(TPS\))X +555 1815(system)N +817(with)X +999(1,000,000)X +1359(account)X +1649(records,)X +1946(100)X +2106(teller)X +2311(records,)X +2607(and)X +2762(10)X +2881(branch)X +3139(records.)X +3455(Where)X +3709(TPS)X +3885(numbers)X +4200(are)X +555 1905(reported,)N +865(we)X +981(are)X +1102(running)X +1373(a)X +1431(modi\256ed)X +1737(version)X +1995(of)X +2084(the)X +2203(industry)X +2486(standard)X +2779(transaction)X +3152(processing)X +3516(benchmark,)X +3914(TPCB.)X +4174(The)X +555 1995(TPCB)N +780(benchmark)X +1163(simulates)X +1491(a)X +1553(withdrawal)X +1940(performed)X +2301(by)X +2407(a)X +2469(hypothetical)X +2891(teller)X +3082(at)X +3166(a)X +3228(hypothetical)X +3650(bank.)X +3872(The)X +4022(database)X +555 2085(consists)N +831(of)X +921(relations)X +1220(\(\256les\))X +1430(for)X +1547(accounts,)X +1871(branches,)X +2200(tellers,)X +2439(and)X +2578(history.)X +2863(For)X +2997(each)X +3168(transaction,)X +3563(the)X +3684(account,)X +3976(teller,)X +4183(and)X +555 2175(branch)N +795(balances)X +1093(must)X +1269(be)X +1366(updated)X +1641(to)X +1724(re\257ect)X +1946(the)X +2065(withdrawal)X +2447(and)X +2584(a)X +2640(history)X +2882(record)X +3108(is)X +3181(written)X +3428(which)X +3644(contains)X +3931(the)X +4049(account)X +555 2265(id,)N +657(branch)X +896(id,)X +998(teller)X +1183(id,)X +1285(and)X +1421(the)X +1539(amount)X +1799(of)X +1886(the)X +2004(withdrawal)X +2385([TPCB90].)X +755 2388(Our)N +914(implementation)X +1450(of)X +1551(the)X +1683(benchmark)X +2074(differs)X +2317(from)X +2506(the)X +2637(speci\256cation)X +3075(in)X +3170(several)X +3431(aspects.)X +3736(The)X +3894(speci\256cation)X +555 2478(requires)N +840(that)X +985(the)X +1108(database)X +1410(keep)X +1587(redundant)X +1933(logs)X +2091(on)X +2196(different)X +2498(devices,)X +2784(but)X +2911(we)X +3030(use)X +3162(a)X +3223(single)X +3439(log.)X +3606(Furthermore,)X +4052(all)X +4157(tests)X +555 2568(were)N +734(run)X +863(on)X +965(a)X +1023(single,)X +1256(centralized)X +1631(system)X +1875(so)X +1968(there)X +2151(is)X +2226(no)X +2328(notion)X +2553(of)X +2641(remote)X +2885(accesses.)X +3219(Finally,)X +3486(we)X +3601(calculated)X +3948(throughput)X +555 2658(by)N +662(dividing)X +955(the)X +1080(total)X +1249(elapsed)X +1517(time)X +1686(by)X +1793(the)X +1918(number)X +2190(of)X +2284(transactions)X +2694(processed)X +3038(rather)X +3253(than)X +3418(by)X +3525(computing)X +3894(the)X +4018(response)X +555 2748(time)N +717(for)X +831(each)X +999(transaction.)X +755 2871(The)N +912(performance)X +1351(comparisons)X +1788(focus)X +1993(on)X +2104(traditional)X +2464(Unix)X +2655(techniques)X +3029(\(unprotected,)X +3486(using)X +3 f +3690(\257ock)X +1 f +3854(\(2\))X +3979(and)X +4126(using)X +3 f +555 2961(fsync)N +1 f +733(\(2\)\))X +884(and)X +1030(a)X +1096(commercial)X +1504(relational)X +1836(database)X +2142(system.)X +2433(Well-behaved)X +2913(applications)X +3329(using)X +3 f +3531(\257ock)X +1 f +3695(\(2\))X +3818(are)X +3946(guaranteed)X +555 3051(that)N +704(concurrent)X +1077(processes')X +1441(updates)X +1715(do)X +1824(not)X +1955(interact)X +2225(with)X +2396(one)X +2541(another,)X +2831(but)X +2962(no)X +3070(guarantees)X +3442(about)X +3648(atomicity)X +3978(are)X +4105(made.)X +555 3141(That)N +731(is,)X +833(if)X +911(the)X +1038(system)X +1289(crashes)X +1555(in)X +1646(mid-transaction,)X +2198(only)X +2369(parts)X +2554(of)X +2649(that)X +2797(transaction)X +3177(will)X +3329(be)X +3433(re\257ected)X +3738(in)X +3828(the)X +3954 0.3125(after-crash)AX +555 3231(state)N +725(of)X +815(the)X +936(database.)X +1276(The)X +1424(use)X +1554(of)X +3 f +1643(fsync)X +1 f +1821(\(2\))X +1937(at)X +2017(transaction)X +2391(commit)X +2657(time)X +2821(provides)X +3119(guarantees)X +3485(of)X +3574(durability)X +3907(after)X +4077(system)X +555 3321(failure.)N +825(However,)X +1160(there)X +1341(is)X +1414(no)X +1514(mechanism)X +1899(to)X +1981(perform)X +2260(transaction)X +2632(abort.)X +3 f +555 3507(5.1.1.)N +775(Single-User)X +1191(Tests)X +1 f +755 3630(These)N +978(tests)X +1151(compare)X +1459(LIBTP)X +1712(in)X +1804(a)X +1870(variety)X +2123(of)X +2220(con\256gurations)X +2708(to)X +2800(traditional)X +3159(UNIX)X +3390(solutions)X +3708(and)X +3854(a)X +3920(commercial)X +555 3720(relational)N +884(database)X +1187(system)X +1435(\(RDBMS\).)X +1814(To)X +1929(demonstrate)X +2347(the)X +2471(server)X +2694(architecture)X +3100(we)X +3220(built)X +3392(a)X +3454(front)X +3636(end)X +3777(test)X +3913(process)X +4179(that)X +555 3810(uses)N +732(TCL)X +922([OUST90])X +1304(to)X +1405(parse)X +1614(database)X +1930(access)X +2175(commands)X +2561(and)X +2716(call)X +2870(the)X +3006(database)X +3321(access)X +3565(routines.)X +3901(In)X +4006(one)X +4160(case)X +555 3900(\(SERVER\),)N +956(frontend)X +1249(and)X +1386(backend)X +1675(processes)X +2004(were)X +2181(created)X +2434(which)X +2650(communicated)X +3142(via)X +3260(an)X +3356(IP)X +3447(socket.)X +3712(In)X +3799(the)X +3917(second)X +4160(case)X +555 3990(\(TCL\),)N +802(a)X +860(single)X +1073(process)X +1336(read)X +1497(queries)X +1751(from)X +1929(standard)X +2223(input,)X +2429(parsed)X +2660(them,)X +2861(and)X +2998(called)X +3211(the)X +3330(database)X +3628(access)X +3855(routines.)X +4174(The)X +555 4080(performance)N +987(difference)X +1338(between)X +1630(the)X +1752(TCL)X +1927(and)X +2067(SERVER)X +2397(tests)X +2563(quanti\256es)X +2898(the)X +3020(communication)X +3542(overhead)X +3861(of)X +3952(the)X +4074(socket.)X +555 4170(The)N +732(RDBMS)X +1063(implementation)X +1617(used)X +1816(embedded)X +2198(SQL)X +2401(in)X +2515(C)X +2620(with)X +2814(stored)X +3062(database)X +3391(procedures.)X +3835(Therefore,)X +4224(its)X +555 4260(con\256guration)N +1003(is)X +1076(a)X +1132(hybrid)X +1361(of)X +1448(the)X +1566(single)X +1777(process)X +2038(architecture)X +2438(and)X +2574(the)X +2692(server)X +2909(architecture.)X +3349(The)X +3494(graph)X +3697(in)X +3779(\256gure)X +3986(six)X +4099(shows)X +555 4350(a)N +611(comparison)X +1005(of)X +1092(the)X +1210(following)X +1541(six)X +1654(con\256gurations:)X +1126 4506(LIBTP)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application.)X +1126 4596(TCL)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application,)X +3067(requires)X +3346(query)X +3549(parsing.)X +1126 4686(SERVER)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(server)X +2677(con\256guration,)X +3144(requires)X +3423(query)X +3626(parsing.)X +1126 4776(NOTP)N +1552(Uses)X +1728(no)X +1828(locking,)X +2108(logging,)X +2392(or)X +2479(concurrency)X +2897(control.)X +1126 4866(FLOCK)N +1552(Uses)X +3 f +1728(\257ock)X +1 f +1892(\(2\))X +2006(for)X +2120(concurrency)X +2538(control)X +2785(and)X +2921(nothing)X +3185(for)X +3299(durability.)X +1126 4956(FSYNC)N +1552(Uses)X +3 f +1728(fsync)X +1 f +1906(\(2\))X +2020(for)X +2134(durability)X +2465(and)X +2601(nothing)X +2865(for)X +2979(concurrency)X +3397(control.)X +1126 5046(RDBMS)N +1552(Uses)X +1728(a)X +1784(commercial)X +2183(relational)X +2506(database)X +2803(system.)X +755 5235(The)N +902(results)X +1133(show)X +1324(that)X +1466(LIBTP,)X +1730(both)X +1894(in)X +1978(the)X +2098(procedural)X +2464(and)X +2602(parsed)X +2834(environments,)X +3312(is)X +3387(competitive)X +3787(with)X +3951(a)X +4009(commer-)X +555 5325(cial)N +692(system)X +935(\(comparing)X +1326(LIBTP,)X +1589(TCL,)X +1781(and)X +1917(RDBMS\).)X +2263(Compared)X +2617(to)X +2699(existing)X +2972(UNIX)X +3193(solutions,)X +3521(LIBTP)X +3763(is)X +3836(approximately)X +555 5415(15%)N +738(slower)X +988(than)X +1162(using)X +3 f +1371(\257ock)X +1 f +1535(\(2\))X +1665(or)X +1768(no)X +1884(protection)X +2245(but)X +2383(over)X +2562(80%)X +2745(better)X +2964(than)X +3137(using)X +3 f +3345(fsync)X +1 f +3523(\(2\))X +3652(\(comparing)X +4057(LIBTP,)X +555 5505(FLOCK,)N +857(NOTP,)X +1106(and)X +1242(FSYNC\).)X + +12 p +%%Page: 12 12 +10 s 10 xH 0 xS 1 f +3 f +8 s +3500 2184(RDBMS)N +1 Dt +3553 2085 MXY + 3553 2085 lineto + 3676 2085 lineto + 3676 1351 lineto + 3553 1351 lineto + 3553 2085 lineto +closepath 16 3553 1351 3676 2085 Dp +2018 2184(SERVER)N +1720 1168 MXY +0 917 Dl +122 0 Dl +0 -917 Dl +-122 0 Dl +1715 2184(TCL)N +2087 1534 MXY + 2087 1534 lineto + 2209 1534 lineto + 2209 2085 lineto + 2087 2085 lineto + 2087 1534 lineto +closepath 12 2087 1534 2209 2085 Dp +3187 MX + 3187 1534 lineto + 3309 1534 lineto + 3309 2085 lineto + 3187 2085 lineto + 3187 1534 lineto +closepath 19 3187 1534 3309 2085 Dp +3142 2184(FSYNC)N +2425(NOTP)X +2453 955 MXY + 2453 955 lineto + 2576 955 lineto + 2576 2085 lineto + 2453 2085 lineto + 2453 955 lineto +closepath 21 2453 955 2576 2085 Dp +2820 1000 MXY + 2820 1000 lineto + 2942 1000 lineto + 2942 2085 lineto + 2820 2085 lineto + 2820 1000 lineto +closepath 14 2820 1000 2942 2085 Dp +5 Dt +1231 2085 MXY +2567 0 Dl +4 Ds +1 Dt +1231 1840 MXY +2567 0 Dl +1231 1596 MXY +2567 0 Dl +1231 1351 MXY +2567 0 Dl +1231 1108 MXY +2567 0 Dl +1231 863 MXY +2567 0 Dl +11 s +1087 1877(2)N +1087 1633(4)N +1087 1388(6)N +1087 1145(8)N +1065 900(10)N +1028 763(TPS)N +-1 Ds +1353 2085 MXY + 1353 2085 lineto + 1353 1151 lineto + 1476 1151 lineto + 1476 2085 lineto + 1353 2085 lineto +closepath 3 1353 1151 1476 2085 Dp +8 s +1318 2184(LIBTP)N +2767(FLOCK)X +3 Dt +-1 Ds +10 s +1597 2399(Figure)N +1844(6:)X +1931(Single-User)X +2347(Performance)X +2814(Comparison.)X +1 f +10 f +555 2579(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 2855(5.1.2.)N +775(Multi-User)X +1174(Tests)X +1 f +755 2978(While)N +975(the)X +1097(single-user)X +1473(tests)X +1639(form)X +1819(a)X +1878(basis)X +2061(for)X +2178(comparing)X +2544(LIBTP)X +2789(to)X +2874(other)X +3062(systems,)X +3358(our)X +3488(goal)X +3649(in)X +3734(multi-user)X +4086(testing)X +555 3068(was)N +714(to)X +810(analyze)X +1089(its)X +1197(scalability.)X +1579(To)X +1701(this)X +1849(end,)X +2018(we)X +2145(have)X +2330(run)X +2470(the)X +2601(benchmark)X +2991(in)X +3086(three)X +3280(modes,)X +3542(the)X +3673(normal)X +3933(disk)X +4099(bound)X +555 3158(con\256guration)N +1010(\(\256gure)X +1252(seven\),)X +1510(a)X +1573(CPU)X +1755(bound)X +1982(con\256guration)X +2436(\(\256gure)X +2677(eight,)X +2884(READ-ONLY\),)X +3426(and)X +3569(lock)X +3734(contention)X +4099(bound)X +555 3248(\(\256gure)N +796(eight,)X +1003(NO_FSYNC\).)X +1510(Since)X +1715(the)X +1840(normal)X +2094(con\256guration)X +2548(is)X +2628(completely)X +3011(disk)X +3171(bound)X +3398(\(each)X +3600(transaction)X +3978(requires)X +4263(a)X +555 3354(random)N +823(read,)X +1005(a)X +1064(random)X +1332(write,)X +1540(and)X +1679(a)X +1738(sequential)X +2086(write)X +7 s +2251 3322(4)N +10 s +3354(\))Y +2329(we)X +2446(expect)X +2679(to)X +2764(see)X +2890(little)X +3059(performance)X +3489(improvement)X +3939(as)X +4028(the)X +4148(mul-)X +555 3444(tiprogramming)N +1064(level)X +1249(increases.)X +1613(In)X +1709(fact,)X +1879(\256gure)X +2095(seven)X +2307(reveals)X +2564(that)X +2713(we)X +2836(are)X +2964(able)X +3127(to)X +3218(overlap)X +3487(CPU)X +3670(and)X +3814(disk)X +3975(utilization)X +555 3534(slightly)N +825(producing)X +1181(approximately)X +1674(a)X +1740(10%)X +1917(performance)X +2354(improvement)X +2811(with)X +2983(two)X +3133(processes.)X +3511(After)X +3711(that)X +3861(point,)X +4075(perfor-)X +555 3624(mance)N +785(drops)X +983(off,)X +1117(and)X +1253(at)X +1331(a)X +1387(multi-programming)X +2038(level)X +2214(of)X +2301(4,)X +2381(we)X +2495(are)X +2614(performing)X +2995(worse)X +3207(than)X +3365(in)X +3447(the)X +3565(single)X +3776(process)X +4037(case.)X +755 3747(Similar)N +1021(behavior)X +1333(was)X +1489(reported)X +1787(on)X +1897(the)X +2025(commercial)X +2434(relational)X +2767(database)X +3074(system)X +3326(using)X +3529(the)X +3657(same)X +3852(con\256guration.)X +555 3837(The)N +707(important)X +1045(conclusion)X +1419(to)X +1508(draw)X +1696(from)X +1879(this)X +2021(is)X +2101(that)X +2248(you)X +2395(cannot)X +2636(attain)X +2841(good)X +3028(multi-user)X +3384(scaling)X +3638(on)X +3745(a)X +3808(badly)X +4013(balanced)X +555 3927(system.)N +839(If)X +915(multi-user)X +1266(performance)X +1695(on)X +1797(applications)X +2205(of)X +2293(this)X +2429(sort)X +2570(is)X +2644(important,)X +2996(one)X +3133(must)X +3309(have)X +3482(a)X +3539(separate)X +3824(logging)X +4089(device)X +555 4017(and)N +697(horizontally)X +1110(partition)X +1407(the)X +1531(database)X +1834(to)X +1921(allow)X +2124(a)X +2185(suf\256ciently)X +2570(high)X +2737(degree)X +2977(of)X +3069(multiprogramming)X +3698(that)X +3843(group)X +4055(commit)X +555 4107(can)N +687(amortize)X +988(the)X +1106(cost)X +1255(of)X +1342(log)X +1464(\257ushing.)X +755 4230(By)N +871(using)X +1067(a)X +1126(very)X +1292(small)X +1488(database)X +1788(\(one)X +1954(that)X +2097(can)X +2232(be)X +2331(entirely)X +2599(cached)X +2846(in)X +2930(main)X +3112(memory\))X +3428(and)X +3566(read-only)X +3896(transactions,)X +555 4320(we)N +670(generated)X +1004(a)X +1061(CPU)X +1236(bound)X +1456(environment.)X +1921(By)X +2034(using)X +2227(the)X +2345(same)X +2530(small)X +2723(database,)X +3040(the)X +3158(complete)X +3472(TPCB)X +3691(transaction,)X +4083(and)X +4219(no)X +3 f +555 4410(fsync)N +1 f +733(\(2\))X +862(on)X +977(the)X +1110(log)X +1247(at)X +1340(commit,)X +1639(we)X +1768(created)X +2036(a)X +2107(lock)X +2280(contention)X +2652(bound)X +2886(environment.)X +3365(The)X +3524(small)X +3731(database)X +4042(used)X +4223(an)X +555 4500(account)N +828(\256le)X +953(containing)X +1314(only)X +1479(1000)X +1662(records)X +1922(rather)X +2133(than)X +2294(the)X +2415(full)X +2549(1,000,000)X +2891(records)X +3150(and)X +3288(ran)X +3413(enough)X +3671(transactions)X +4076(to)X +4160(read)X +555 4590(the)N +677(entire)X +883(database)X +1183(into)X +1330(the)X +1451(buffer)X +1671(pool)X +1836(\(2000\))X +2073(before)X +2302(beginning)X +2645(measurements.)X +3147(The)X +3295(read-only)X +3626(transaction)X +4001(consisted)X +555 4680(of)N +646(three)X +831(database)X +1132(reads)X +1326(\(from)X +1533(the)X +1655(1000)X +1839(record)X +2069(account)X +2343(\256le,)X +2489(the)X +2611(100)X +2754(record)X +2983(teller)X +3171(\256le,)X +3316(and)X +3455(the)X +3576(10)X +3679(record)X +3908(branch)X +4150(\256le\).)X +555 4770(Since)N +759(no)X +865(data)X +1025(were)X +1208(modi\256ed)X +1518(and)X +1660(no)X +1766(history)X +2014(records)X +2277(were)X +2460(written,)X +2733(no)X +2839(log)X +2966(records)X +3228(were)X +3410(written.)X +3702(For)X +3838(the)X +3961(contention)X +555 4860(bound)N +780(con\256guration,)X +1252(we)X +1371(used)X +1543(the)X +1666(normal)X +1918(TPCB)X +2142(transaction)X +2519(\(against)X +2798(the)X +2920(small)X +3117(database\))X +3445(and)X +3585(disabled)X +3876(the)X +3998(log)X +4124(\257ush.)X +555 4950(Figure)N +784(eight)X +964(shows)X +1184(both)X +1346(of)X +1433(these)X +1618(results.)X +755 5073(The)N +902(read-only)X +1231(test)X +1363(indicates)X +1669(that)X +1810(we)X +1925(barely)X +2147(scale)X +2329(at)X +2408(all)X +2509(in)X +2592(the)X +2711(CPU)X +2887(bound)X +3108(case.)X +3308(The)X +3454(explanation)X +3849(for)X +3964(that)X +4105(is)X +4179(that)X +555 5163(even)N +735(with)X +905(a)X +969(single)X +1188(process,)X +1477(we)X +1599(are)X +1726(able)X +1888(to)X +1978(drive)X +2171(the)X +2297(CPU)X +2480(utilization)X +2832(to)X +2922(96%.)X +3137(As)X +3254(a)X +3317(result,)X +3542(that)X +3689(gives)X +3885(us)X +3983(very)X +4153(little)X +555 5253(room)N +753(for)X +876(improvement,)X +1352(and)X +1497(it)X +1570(takes)X +1764(a)X +1829(multiprogramming)X +2462(level)X +2647(of)X +2743(four)X +2906(to)X +2997(approach)X +3321(100%)X +3537(CPU)X +3721(saturation.)X +4106(In)X +4201(the)X +555 5343(case)N +718(where)X +939(we)X +1057(do)X +1161(perform)X +1444(writes,)X +1684(we)X +1802(are)X +1925(interested)X +2261(in)X +2347(detecting)X +2665(when)X +2863(lock)X +3025(contention)X +3387(becomes)X +3691(a)X +3750(dominant)X +4075(perfor-)X +555 5433(mance)N +787(factor.)X +1037(Contention)X +1414(will)X +1560(cause)X +1761(two)X +1903(phenomena;)X +2317(we)X +2433(will)X +2579(see)X +2704(transactions)X +3109(queueing)X +3425(behind)X +3665(frequently)X +4017(accessed)X +555 5523(data,)N +731(and)X +869(we)X +985(will)X +1131(see)X +1256(transaction)X +1629(abort)X +1815(rates)X +1988(increasing)X +2339(due)X +2476(to)X +2559(deadlock.)X +2910(Given)X +3127(that)X +3268(the)X +3387(branch)X +3627(\256le)X +3750(contains)X +4038(only)X +4201(ten)X +8 s +10 f +555 5595(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5673(4)N +8 s +763 5698(Although)N +1021(the)X +1115(log)X +1213(is)X +1272(written)X +1469(sequentially,)X +1810(we)X +1900(do)X +1980(not)X +2078(get)X +2172(the)X +2266(bene\256t)X +2456(of)X +2525(sequentiality)X +2868(since)X +3015(the)X +3109(log)X +3207(and)X +3315(database)X +3550(reside)X +3718(on)X +3798(the)X +3892(same)X +4039(disk.)X + +13 p +%%Page: 13 13 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2028 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 1926 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3680 1832 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1909 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1883 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1679 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +4074 1487 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +5 Dt +3187 2060 MXY +99 -24 Dl +98 -101 Dl +99 -16 Dl +98 0 Dl +99 -78 Dl +98 77 Dl +99 -26 Dl +98 -204 Dl +99 -192 Dl +3 f +6 s +4088 1516(SMALL)N +3 Dt +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 2041 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1990 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1843 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +3680 1578 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1496 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1430 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1269 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +4074 1070 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1 Dt +3187 2060 MXY +99 0 Dl +98 -10 Dl +99 -51 Dl +98 -147 Dl +99 -265 Dl +98 -82 Dl +99 -66 Dl +98 -161 Dl +99 -199 Dl +4088 1099(LARGE)N +5 Dt +3089 2060 MXY +985 0 Dl +3089 MX +0 -1174 Dl +4 Ds +1 Dt +3581 2060 MXY +0 -1174 Dl +4074 2060 MXY +0 -1174 Dl +3089 1825 MXY +985 0 Dl +9 s +2993 1855(25)N +3089 1591 MXY +985 0 Dl +2993 1621(50)N +3089 1356 MXY +985 0 Dl +2993 1386(75)N +3089 1121 MXY +985 0 Dl +2957 1151(100)N +3089 886 MXY +985 0 Dl +2957 916(125)N +3281 2199(Multiprogramming)N +3071 2152(0)N +3569(5)X +4038(10)X +2859 787(Aborts)N +3089(per)X +3211(500)X +2901 847(transactions)N +-1 Ds +3 Dt +2037 1342 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1358 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1341 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1191 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1124 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1161 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1153 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 1150 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +2037 1351 MXY +88 16 Dl +88 -17 Dl +88 -150 Dl +87 -67 Dl +88 33 Dl +88 0 Dl +88 4 Dl +88 -8 Dl +88 -3 Dl +6 s +2685 1234(READ-ONLY)N +3 Dt +2037 1464 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1640 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1854 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1872 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1871 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1933 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1914 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1903 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1980 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 2004 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +1 Dt +2037 1473 MXY +88 176 Dl +88 214 Dl +88 18 Dl +87 -2 Dl +88 63 Dl +88 -19 Dl +88 -11 Dl +88 77 Dl +88 24 Dl +2759 1997(NO-FSYNC)N +5 Dt +1949 2060 MXY +879 0 Dl +1949 MX +0 -1174 Dl +4 Ds +1 Dt +2388 2060 MXY +0 -1174 Dl +2828 2060 MXY +0 -1174 Dl +1949 1825 MXY +879 0 Dl +9 s +1842 1855(40)N +1949 1591 MXY +879 0 Dl +1842 1621(80)N +1949 1356 MXY +879 0 Dl +1806 1386(120)N +1949 1121 MXY +879 0 Dl +1806 1151(160)N +1949 886 MXY +879 0 Dl +1806 916(200)N +2088 2199(Multiprogramming)N +1844 863(in)N +1922(TPS)X +1761 792(Throughput)N +1931 2121(0)N +2370 2133(5)N +2792(10)X +6 s +1679 1833(LIBTP)N +-1 Ds +3 Dt +837 1019 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +929 878 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1021 939 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1113 1043 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1205 1314 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1297 1567 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1389 1665 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1481 1699 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1573 1828 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1665 1804 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +837 1027 MXY +92 -141 Dl +92 62 Dl +92 104 Dl +92 271 Dl +92 253 Dl +92 98 Dl +92 34 Dl +92 129 Dl +92 -24 Dl +745 2060 MXY +920 0 Dl +745 MX +0 -1174 Dl +4 Ds +1 Dt +1205 2060 MXY +0 -1174 Dl +1665 2060 MXY +0 -1174 Dl +745 1766 MXY +920 0 Dl +9 s +673 1796(3)N +745 1473 MXY +920 0 Dl +673 1503(5)N +745 1180 MXY +920 0 Dl +673 1210(8)N +745 886 MXY +920 0 Dl +637 916(10)N +905 2199(Multiprogramming)N +622 851(in)N +700(TPS)X +575 792(Throughput)N +733 2152(0)N +1196(5)X +1629(10)X +3 Dt +-1 Ds +8 s +655 2441(Figure)N +872(7:)X +960(Multi-user)X +1286(Performance.)X +1 f +655 2531(Since)N +825(the)X +931(con\256guration)X +1300(is)X +1371(completely)X +655 2621(disk)N +790(bound,)X +994(we)X +1096(see)X +1204(only)X +1345(a)X +1400(small)X +1566(im-)X +655 2711(provement)N +964(by)X +1064(adding)X +1274(a)X +1337(second)X +1549(pro-)X +655 2801(cess.)N +849(Adding)X +1081(any)X +1213(more)X +1383(concurrent)X +655 2891(processes)N +935(causes)X +1137(performance)X +1493(degra-)X +655 2981(dation.)N +3 f +1927 2441(Figure)N +2149(8:)X +2243(Multi-user)X +2574(Performance)X +1927 2531(on)N +2021(a)X +2079(small)X +2251(database.)X +1 f +2551(With)X +2704(one)X +2821(pro-)X +1927 2621(cess,)N +2075(we)X +2174(are)X +2276(driving)X +2486(the)X +2589(CPU)X +2739(at)X +2810(96%)X +1927 2711(utilization)N +2215(leaving)X +2430(little)X +2575(room)X +2737(for)X +2838(im-)X +1927 2801(provement)N +2238(as)X +2328(the)X +2443(multiprogramming)X +1927 2891(level)N +2091(increases.)X +2396(In)X +2489(the)X +2607(NO-FSYNC)X +1927 2981(case,)N +2076(lock)X +2209(contention)X +2502(degrades)X +2751(perfor-)X +1927 3071(mance)N +2117(as)X +2194(soon)X +2339(as)X +2416(a)X +2468(second)X +2669(process)X +2884(is)X +1927 3161(added.)N +3 f +3199 2441(Figure)N +3405(9:)X +3482(Abort)X +3669(rates)X +3827(on)X +3919(the)X +4028(TPCB)X +3199 2531(Benchmark.)N +1 f +3589(The)X +3726(abort)X +3895(rate)X +4028(climbs)X +3199 2621(more)N +3366(quickly)X +3594(for)X +3704(the)X +3818(large)X +3980(database)X +3199 2711(test)N +3324(since)X +3491(processes)X +3771(are)X +3884(descheduled)X +3199 2801(more)N +3409(frequently,)X +3766(allowing)X +4068(more)X +3199 2891(processes)N +3459(to)X +3525(vie)X +3619(for)X +3709(the)X +3803(same)X +3950(locks.)X +10 s +10 f +555 3284(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 3560(records,)N +835(we)X +952(expect)X +1185(contention)X +1546(to)X +1631(become)X +1904(a)X +1963(factor)X +2174(quickly)X +2437(and)X +2576(the)X +2697(NO-FSYNC)X +3120(line)X +3263(in)X +3348(\256gure)X +3557(eight)X +3739(demonstrates)X +4184(this)X +555 3650(dramatically.)N +1022(Each)X +1209(additional)X +1555(process)X +1822(causes)X +2058(both)X +2226(more)X +2417(waiting)X +2682(and)X +2823(more)X +3013(deadlocking.)X +3470(Figure)X +3704(nine)X +3867(shows)X +4092(that)X +4237(in)X +555 3740(the)N +681(small)X +882(database)X +1187(case)X +1353(\(SMALL\),)X +1725(waiting)X +1992(is)X +2072(the)X +2197(dominant)X +2526(cause)X +2732(of)X +2826(declining)X +3151(performance)X +3585(\(the)X +3737(number)X +4009(of)X +4103(aborts)X +555 3830(increases)N +878(less)X +1026(steeply)X +1281(than)X +1447(the)X +1573(performance)X +2008(drops)X +2214(off)X +2336(in)X +2426(\256gure)X +2641(eight\),)X +2876(while)X +3082(in)X +3172(the)X +3298(large)X +3487(database)X +3792(case)X +3958(\(LARGE\),)X +555 3920(deadlocking)N +967(contributes)X +1343(more)X +1528(to)X +1610(the)X +1728(declining)X +2046(performance.)X +755 4043(Deadlocks)N +1116(are)X +1237(more)X +1424(likely)X +1628(to)X +1712(occur)X +1913(in)X +1997(the)X +2116(LARGE)X +2404(test)X +2536(than)X +2695(in)X +2778(the)X +2897(SMALL)X +3189(test)X +3321(because)X +3597(there)X +3779(are)X +3899(more)X +4085(oppor-)X +555 4133(tunities)N +814(to)X +900(wait.)X +1082(In)X +1173(the)X +1295(SMALL)X +1590(case,)X +1773(processes)X +2105(never)X +2307(do)X +2410(I/O)X +2540(and)X +2679(are)X +2801(less)X +2944(likely)X +3149(to)X +3234(be)X +3333(descheduled)X +3753(during)X +3985(a)X +4044(transac-)X +555 4223(tion.)N +740(In)X +828(the)X +947(LARGE)X +1235(case,)X +1415(processes)X +1744(will)X +1889(frequently)X +2240(be)X +2337(descheduled)X +2755(since)X +2941(they)X +3100(have)X +3273(to)X +3356(perform)X +3636(I/O.)X +3804(This)X +3967(provides)X +4263(a)X +555 4313(window)N +837(where)X +1058(a)X +1118(second)X +1365(process)X +1630(can)X +1766(request)X +2022(locks)X +2215(on)X +2318(already)X +2578(locked)X +2815(pages,)X +3041(thus)X +3197(increasing)X +3550(the)X +3671(likelihood)X +4018(of)X +4108(build-)X +555 4403(ing)N +677(up)X +777(long)X +939(chains)X +1164(of)X +1251(waiting)X +1511(processes.)X +1879(Eventually,)X +2266(this)X +2401(leads)X +2586(to)X +2668(deadlock.)X +3 f +555 4589(5.2.)N +715(The)X +868(OO1)X +1052(Benchmark)X +1 f +755 4712(The)N +903(TPCB)X +1125(benchmark)X +1505(described)X +1836(in)X +1921(the)X +2042(previous)X +2341(section)X +2591(measures)X +2913(performance)X +3343(under)X +3549(a)X +3608(conventional)X +4044(transac-)X +555 4802(tion)N +706(processing)X +1076(workload.)X +1446(Other)X +1656(application)X +2039(domains,)X +2357(such)X +2531(as)X +2625(computer-aided)X +3156(design,)X +3412(have)X +3591(substantially)X +4022(different)X +555 4892(access)N +786(patterns.)X +1105(In)X +1197(order)X +1392(to)X +1479(measure)X +1772(the)X +1895(performance)X +2327(of)X +2418(LIBTP)X +2664(under)X +2871(workloads)X +3229(of)X +3320(this)X +3459(type,)X +3641(we)X +3759(implemented)X +4201(the)X +555 4982(OO1)N +731(benchmark)X +1108(described)X +1436(in)X +1518([CATT91].)X +755 5105(The)N +908(database)X +1213(models)X +1472(a)X +1535(set)X +1651(of)X +1745(electronics)X +2120(components)X +2534(with)X +2703(connections)X +3113(among)X +3358(them.)X +3585(One)X +3746(table)X +3929(stores)X +4143(parts)X +555 5195(and)N +696(another)X +962(stores)X +1174(connections.)X +1622(There)X +1835(are)X +1959(three)X +2145(connections)X +2552(originating)X +2927(at)X +3009(any)X +3149(given)X +3351(part.)X +3540(Ninety)X +3782(percent)X +4043(of)X +4134(these)X +555 5285(connections)N +960(are)X +1081(to)X +1165(nearby)X +1406(parts)X +1584(\(those)X +1802(with)X +1966(nearby)X +2 f +2207(ids)X +1 f +2300(\))X +2348(to)X +2431(model)X +2652(the)X +2771(spatial)X +3001(locality)X +3262(often)X +3448(exhibited)X +3767(in)X +3850(CAD)X +4040(applica-)X +555 5375(tions.)N +779(Ten)X +933(percent)X +1198(of)X +1293(the)X +1419(connections)X +1830(are)X +1957(randomly)X +2292(distributed)X +2662(among)X +2908(all)X +3016(other)X +3209(parts)X +3393(in)X +3483(the)X +3609(database.)X +3954(Every)X +4174(part)X +555 5465(appears)N +829(exactly)X +1089(three)X +1278(times)X +1479(in)X +1569(the)X +2 f +1695(from)X +1 f +1874(\256eld)X +2043(of)X +2137(a)X +2200(connection)X +2579(record,)X +2832(and)X +2975(zero)X +3141(or)X +3235(more)X +3427(times)X +3627(in)X +3716(the)X +2 f +3841(to)X +1 f +3930(\256eld.)X +4139(Parts)X +555 5555(have)N +2 f +727(x)X +1 f +783(and)X +2 f +919(y)X +1 f +975(locations)X +1284(set)X +1393(randomly)X +1720(in)X +1802(an)X +1898(appropriate)X +2284(range.)X + +14 p +%%Page: 14 14 +10 s 10 xH 0 xS 1 f +3 f +1 f +755 630(The)N +900(intent)X +1102(of)X +1189(OO1)X +1365(is)X +1438(to)X +1520(measure)X +1808(the)X +1926(overall)X +2169(cost)X +2318(of)X +2405(a)X +2461(query)X +2664(mix)X +2808(characteristic)X +3257(of)X +3344(engineering)X +3743(database)X +4040(applica-)X +555 720(tions.)N +770(There)X +978(are)X +1097(three)X +1278(tests:)X +10 f +635 843(g)N +2 f +755(Lookup)X +1 f +1022(generates)X +1353(1,000)X +1560(random)X +1832(part)X +2 f +1984(ids)X +1 f +2077(,)X +2124(fetches)X +2378(the)X +2502(corresponding)X +2987(parts)X +3169(from)X +3351(the)X +3475(database,)X +3798(and)X +3940(calls)X +4113(a)X +4175(null)X +755 933(procedure)N +1097(in)X +1179(the)X +1297(host)X +1450(programming)X +1906(language)X +2216(with)X +2378(the)X +2496(parts')X +2 f +2699(x)X +1 f +2755(and)X +2 f +2891(y)X +1 f +2947(positions.)X +10 f +635 1056(g)N +2 f +755(Traverse)X +1 f +1067(retrieves)X +1371(a)X +1434(random)X +1706(part)X +1858(from)X +2041(the)X +2166(database)X +2470(and)X +2613(follows)X +2880(connections)X +3290(from)X +3473(it)X +3544(to)X +3632(other)X +3823(parts.)X +4045(Each)X +4232(of)X +755 1146(those)N +947(parts)X +1126(is)X +1202(retrieved,)X +1531(and)X +1670(all)X +1773(connections)X +2179(from)X +2358(it)X +2424(followed.)X +2771(This)X +2935(procedure)X +3279(is)X +3354(repeated)X +3649(depth-\256rst)X +4000(for)X +4116(seven)X +755 1236(hops)N +930(from)X +1110(the)X +1232(original)X +1505(part,)X +1674(for)X +1792(a)X +1852(total)X +2018(of)X +2109(3280)X +2293(parts.)X +2513(Backward)X +2862(traversal)X +3162(also)X +3314(exists,)X +3539(and)X +3678(follows)X +3941(all)X +4044(connec-)X +755 1326(tions)N +930(into)X +1074(a)X +1130(given)X +1328(part)X +1473(to)X +1555(their)X +1722(origin.)X +10 f +635 1449(g)N +2 f +755(Insert)X +1 f +962(adds)X +1129(100)X +1269(new)X +1423(parts)X +1599(and)X +1735(their)X +1902(connections.)X +755 1572(The)N +913(benchmark)X +1303(is)X +1389(single-user,)X +1794(but)X +1929(multi-user)X +2291(access)X +2530(controls)X +2821(\(locking)X +3120(and)X +3268(transaction)X +3652(protection\))X +4036(must)X +4223(be)X +555 1662(enforced.)N +898(It)X +968(is)X +1042(designed)X +1348(to)X +1431(be)X +1528(run)X +1656(on)X +1757(a)X +1814(database)X +2112(with)X +2275(20,000)X +2516(parts,)X +2713(and)X +2850(on)X +2951(one)X +3087(with)X +3249(200,000)X +3529(parts.)X +3745(Because)X +4033(we)X +4147(have)X +555 1752(insuf\256cient)N +935(disk)X +1088(space)X +1287(for)X +1401(the)X +1519(larger)X +1727(database,)X +2044(we)X +2158(report)X +2370(results)X +2599(only)X +2761(for)X +2875(the)X +2993(20,000)X +3233(part)X +3378(database.)X +3 f +555 1938(5.2.1.)N +775(Implementation)X +1 f +755 2061(The)N +920(LIBTP)X +1182(implementation)X +1724(of)X +1831(OO1)X +2027(uses)X +2205(the)X +2342(TCL)X +2532([OUST90])X +2914(interface)X +3235(described)X +3582(earlier.)X +3867(The)X +4031(backend)X +555 2151(accepts)N +813(commands)X +1181(over)X +1345(an)X +1442(IP)X +1534(socket)X +1760(and)X +1897(performs)X +2208(the)X +2327(requested)X +2656(database)X +2954(actions.)X +3242(The)X +3387(frontend)X +3679(opens)X +3886(and)X +4022(executes)X +555 2241(a)N +618(TCL)X +796(script.)X +1041(This)X +1210(script)X +1415(contains)X +1709(database)X +2013(accesses)X +2313(interleaved)X +2697(with)X +2866(ordinary)X +3165(program)X +3463(control)X +3716(statements.)X +4120(Data-)X +555 2331(base)N +718(commands)X +1085(are)X +1204(submitted)X +1539(to)X +1621(the)X +1739(backend)X +2027(and)X +2163(results)X +2392(are)X +2511(bound)X +2731(to)X +2813(program)X +3105(variables.)X +755 2454(The)N +903(parts)X +1082(table)X +1261(was)X +1409(stored)X +1628(as)X +1718(a)X +1776(B-tree)X +1999(indexed)X +2275(by)X +2 f +2377(id)X +1 f +2439(.)X +2501(The)X +2648(connection)X +3022(table)X +3200(was)X +3347(stored)X +3565(as)X +3654(a)X +3712(set)X +3823(of)X +3912(\256xed-length)X +555 2544(records)N +824(using)X +1029(the)X +1159(4.4BSD)X +1446(recno)X +1657(access)X +1895(method.)X +2207(In)X +2306(addition,)X +2620(two)X +2771(B-tree)X +3003(indices)X +3261(were)X +3449(maintained)X +3836(on)X +3947(connection)X +555 2634(table)N +732(entries.)X +1007(One)X +1162(index)X +1360(mapped)X +1634(the)X +2 f +1752(from)X +1 f +1923(\256eld)X +2085(to)X +2167(a)X +2223(connection)X +2595(record)X +2821(number,)X +3106(and)X +3242(the)X +3360(other)X +3545(mapped)X +3819(the)X +2 f +3937(to)X +1 f +4019(\256eld)X +4181(to)X +4263(a)X +555 2724(connection)N +932(record)X +1163(number.)X +1473(These)X +1690(indices)X +1941(support)X +2205(fast)X +2345(lookups)X +2622(on)X +2726(connections)X +3133(in)X +3219(both)X +3385(directions.)X +3765(For)X +3900(the)X +4022(traversal)X +555 2814(tests,)N +743(the)X +867(frontend)X +1165(does)X +1338(an)X +1439(index)X +1642(lookup)X +1889(to)X +1976(discover)X +2273(the)X +2396(connected)X +2747(part's)X +2 f +2955(id)X +1 f +3017(,)X +3062(and)X +3203(then)X +3366(does)X +3538(another)X +3804(lookup)X +4051(to)X +4138(fetch)X +555 2904(the)N +673(part)X +818(itself.)X +3 f +555 3090(5.2.2.)N +775(Performance)X +1242(Measurements)X +1766(for)X +1889(OO1)X +1 f +755 3213(We)N +888(compare)X +1186(LIBTP's)X +1487(OO1)X +1664(performance)X +2092(to)X +2174(that)X +2314(reported)X +2602(in)X +2684([CATT91].)X +3087(Those)X +3303(results)X +3532(were)X +3709(collected)X +4019(on)X +4119(a)X +4175(Sun)X +555 3303(3/280)N +759(\(25)X +888(MHz)X +1075(MC68020\))X +1448(with)X +1612(16)X +1714(MBytes)X +1989(of)X +2078(memory)X +2367(and)X +2505(two)X +2647(Hitachi)X +2904(892MByte)X +3267(disks)X +3452(\(15)X +3580(ms)X +3694(average)X +3966(seek)X +4130(time\))X +555 3393(behind)N +793(an)X +889(SMD-4)X +1149(controller.)X +1521(Frontends)X +1861(ran)X +1984(on)X +2084(an)X +2180(8MByte)X +2462(Sun)X +2606(3/260.)X +755 3516(In)N +844(order)X +1036(to)X +1120(measure)X +1410(performance)X +1839(on)X +1941(a)X +1999(machine)X +2293(of)X +2382(roughly)X +2653(equivalent)X +3009(processor)X +3339(power,)X +3582(we)X +3698(ran)X +3822(one)X +3959(set)X +4069(of)X +4157(tests)X +555 3606(on)N +666(a)X +733(standalone)X +1107(MC68030-based)X +1671(HP300)X +1923(\(33MHz)X +2225(MC68030\).)X +2646(The)X +2801(database)X +3108(was)X +3263(stored)X +3489(on)X +3599(a)X +3665(300MByte)X +4037(HP7959)X +555 3696(SCSI)N +744(disk)X +898(\(17)X +1026(ms)X +1139(average)X +1410(seek)X +1573(time\).)X +1802(Since)X +2000(this)X +2135(machine)X +2427(is)X +2500(not)X +2622(connected)X +2968(to)X +3050(a)X +3106(network,)X +3409(we)X +3523(ran)X +3646(local)X +3822(tests)X +3984(where)X +4201(the)X +555 3786(frontend)N +855(and)X +999(backend)X +1295(run)X +1430(on)X +1538(the)X +1664(same)X +1856(machine.)X +2195(We)X +2334(compare)X +2638(these)X +2830(measurements)X +3316(with)X +3485(Cattell's)X +3783(local)X +3966(Sun)X +4117(3/280)X +555 3876(numbers.)N +755 3999(Because)N +1051(the)X +1177(benchmark)X +1562(requires)X +1849(remote)X +2100(access,)X +2354(we)X +2476(ran)X +2607(another)X +2876(set)X +2993(of)X +3088(tests)X +3258(on)X +3365(a)X +3428(DECstation)X +3828(5000/200)X +4157(with)X +555 4089(32M)N +732(of)X +825(memory)X +1118(running)X +1393(Ultrix)X +1610(V4.0)X +1794(and)X +1936(a)X +1998(DEC)X +2184(1GByte)X +2459(RZ57)X +2666(SCSI)X +2859(disk.)X +3057(We)X +3194(compare)X +3496(the)X +3619(local)X +3800(performance)X +4232(of)X +555 4179(OO1)N +734(on)X +837(the)X +958(DECstation)X +1354(to)X +1439(its)X +1536(remote)X +1781(performance.)X +2250(For)X +2383(the)X +2503(remote)X +2748(case,)X +2929(we)X +3045(ran)X +3170(the)X +3290(frontend)X +3584(on)X +3686(a)X +3744(DECstation)X +4139(3100)X +555 4269(with)N +717(16)X +817(MBytes)X +1090(of)X +1177(main)X +1357(memory.)X +755 4392(The)N +900(databases)X +1228(tested)X +1435(in)X +1517([CATT91])X +1880(are)X +10 f +635 4515(g)N +1 f +755(INDEX,)X +1045(a)X +1101(highly-optimized)X +1672(access)X +1898(method)X +2158(package)X +2442(developed)X +2792(at)X +2870(Sun)X +3014(Microsystems.)X +10 f +635 4638(g)N +1 f +755(OODBMS,)X +1137(a)X +1193(beta)X +1347(release)X +1591(of)X +1678(a)X +1734(commercial)X +2133(object-oriented)X +2639(database)X +2936(management)X +3366(system.)X +10 f +635 4761(g)N +1 f +755(RDBMS,)X +1076(a)X +1133(UNIX-based)X +1565(commercial)X +1965(relational)X +2289(data)X +2444(manager)X +2742(at)X +2821(production)X +3189(release.)X +3474(The)X +3620(OO1)X +3797(implementation)X +755 4851(used)N +922(embedded)X +1272(SQL)X +1443(in)X +1525(C.)X +1638(Stored)X +1867(procedures)X +2240(were)X +2417(de\256ned)X +2673(to)X +2755(reduce)X +2990(client-server)X +3412(traf\256c.)X +755 4974(Table)N +974(two)X +1130(shows)X +1366(the)X +1500(measurements)X +1995(from)X +2187([CATT91])X +2566(and)X +2718(LIBTP)X +2976(for)X +3106(a)X +3178(local)X +3370(test)X +3517(on)X +3632(the)X +3765(MC680x0-based)X +555 5064(hardware.)N +915(All)X +1037(caches)X +1272(are)X +1391(cleared)X +1644(before)X +1870(each)X +2038(test.)X +2209(All)X +2331(times)X +2524(are)X +2643(in)X +2725(seconds.)X +755 5187(Table)N +960(two)X +1102(shows)X +1324(that)X +1466(LIBTP)X +1710(outperforms)X +2123(the)X +2242(commercial)X +2642(relational)X +2966(system,)X +3229(but)X +3352(is)X +3426(slower)X +3661(than)X +3820(OODBMS)X +4183(and)X +555 5277(INDEX.)N +872(Since)X +1077(the)X +1202(caches)X +1444(were)X +1628(cleared)X +1888(at)X +1973(the)X +2098(start)X +2263(of)X +2356(each)X +2530(test,)X +2687(disk)X +2846(throughput)X +3223(is)X +3302(critical)X +3551(in)X +3639(this)X +3780(test.)X +3957(The)X +4108(single)X +555 5367(SCSI)N +749(HP)X +877(drive)X +1068(used)X +1241(by)X +1347(LIBTP)X +1595(is)X +1674(approximately)X +2163(13%)X +2336(slower)X +2576(than)X +2739(the)X +2862(disks)X +3051(used)X +3223(in)X +3310([CATT91])X +3678(which)X +3899(accounts)X +4205(for)X +555 5457(part)N +700(of)X +787(the)X +905(difference.)X +755 5580(OODBMS)N +1118(and)X +1255(INDEX)X +1525(outperform)X +1906(LIBTP)X +2148(most)X +2323(dramatically)X +2744(on)X +2844(traversal.)X +3181(This)X +3343(is)X +3416(because)X +3691(we)X +3805(use)X +3932(index)X +4130(look-)X +555 5670(ups)N +689(to)X +774(\256nd)X +921(connections,)X +1347(whereas)X +1634(the)X +1755(other)X +1942(two)X +2084(systems)X +2359(use)X +2488(a)X +2546(link)X +2692(access)X +2920(method.)X +3222(The)X +3369(index)X +3569(requires)X +3850(us)X +3943(to)X +4027(examine)X + +15 p +%%Page: 15 15 +10 s 10 xH 0 xS 1 f +3 f +1 f +10 f +555 679(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +2 f +606 769(Measure)N +1 f +1019(INDEX)X +1389(OODBMS)X +1851(RDBMS)X +2250(LIBTP)X +10 f +555 771(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555 787(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +1 f +595 869(Lookup)N +1114(5.4)X +1490(12.9)X +1950(27)X +2291(27.2)X +595 959(Traversal)N +1074(13)X +1530(9.8)X +1950(90)X +2291(47.3)X +595 1049(Insert)N +1114(7.4)X +1530(1.5)X +1950(22)X +2331(9.7)X +10 f +555 1059(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555(c)X +999(c)Y +919(c)Y +839(c)Y +759(c)Y +959 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1329 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1791 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2190 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2512 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2618 679(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +2829 769(Measure)N +3401(Cache)X +3726(Local)X +4028(Remote)X +1 f +10 f +2618 771(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618 787(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 869(Lookup)N +3401(cold)X +3747(15.7)X +4078(20.6)X +3401 959(warm)N +3787(7.8)X +4078(12.4)X +10 f +2618 969(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1059(Forward)N +2950(traversal)X +3401(cold)X +3747(28.4)X +4078(52.6)X +3401 1149(warm)N +3747(23.5)X +4078(47.4)X +10 f +2618 1159(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1249(Backward)N +3004(traversal)X +3401(cold)X +3747(24.2)X +4078(47.4)X +3401 1339(warm)N +3747(24.3)X +4078(47.6)X +10 f +2618 1349(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1439(Insert)N +3401(cold)X +3787(7.5)X +4078(10.3)X +3401 1529(warm)N +3787(6.7)X +4078(10.9)X +10 f +2618 1539(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618(c)X +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3341 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3666 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3968 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +4309 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3 f +587 1785(Table)N +823(2:)X +931(Local)X +1163(MC680x0)X +1538(Performance)X +2026(of)X +2133(Several)X +587 1875(Systems)N +883(on)X +987(OO1.)X +2667 1785(Table)N +2909(3:)X +3023(Local)X +3260(vs.)X +3397(Remote)X +3707(Performance)X +4200(of)X +2667 1875(LIBTP)N +2926(on)X +3030(OO1.)X +1 f +10 f +555 1998(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2274(two)N +696(disk)X +850(pages,)X +1074(but)X +1197(the)X +1316(links)X +1492(require)X +1741(only)X +1904(one,)X +2061(regardless)X +2408(of)X +2496(database)X +2794(size.)X +2980(Cattell)X +3214(reports)X +3458(that)X +3599(lookups)X +3873(using)X +4067(B-trees)X +555 2364(instead)N +808(of)X +901(links)X +1082(makes)X +1313(traversal)X +1616(take)X +1776(twice)X +1976(as)X +2069(long)X +2237(in)X +2325(INDEX.)X +2641(Adding)X +2907(a)X +2969(link)X +3119(access)X +3351(method)X +3617(to)X +3 f +3704(db)X +1 f +3792(\(3\))X +3911(or)X +4003(using)X +4201(the)X +555 2454(existing)N +828(hash)X +995(method)X +1255(would)X +1475(apparently)X +1834(be)X +1930(a)X +1986(good)X +2166(idea.)X +755 2577(Both)N +936(OODBMS)X +1304(and)X +1446(INDEX)X +1722(issue)X +1908 0.1944(coarser-granularity)AX +2545(locks)X +2739(than)X +2902(LIBTP.)X +3189(This)X +3356(limits)X +3562(concurrency)X +3985(for)X +4104(multi-)X +555 2667(user)N +711(applications,)X +1140(but)X +1264(helps)X +1455(single-user)X +1829(applications.)X +2278(In)X +2367(addition,)X +2671(the)X +2791(fact)X +2934(that)X +3076(LIBTP)X +3319(releases)X +3595(B-tree)X +3817(locks)X +4007(early)X +4189(is)X +4263(a)X +555 2757(drawback)N +896(in)X +986(OO1.)X +1210(Since)X +1416(there)X +1605(is)X +1686(no)X +1793(concurrency)X +2218(in)X +2307(the)X +2432(benchmark,)X +2836(high-concurrency)X +3430(strategies)X +3760(only)X +3929(show)X +4125(up)X +4232(as)X +555 2847(increased)N +882(locking)X +1145(overhead.)X +1503(Finally,)X +1772(the)X +1892(architecture)X +2294(of)X +2383(the)X +2503(LIBTP)X +2747(implementation)X +3271(was)X +3418(substantially)X +3844(different)X +4143(from)X +555 2937(that)N +702(of)X +796(either)X +1006(OODBMS)X +1375(or)X +1469(INDEX.)X +1786(Both)X +1968(of)X +2062(those)X +2258(systems)X +2538(do)X +2645(the)X +2770(searches)X +3070(in)X +3159(the)X +3284(user's)X +3503(address)X +3771(space,)X +3997(and)X +4139(issue)X +555 3027(requests)N +844(for)X +964(pages)X +1173(to)X +1260(the)X +1383(server)X +1605(process.)X +1911(Pages)X +2123(are)X +2247(cached)X +2496(in)X +2583(the)X +2706(client,)X +2929(and)X +3070(many)X +3273(queries)X +3530(can)X +3667(be)X +3768(satis\256ed)X +4055(without)X +555 3117(contacting)N +910(the)X +1029(server)X +1247(at)X +1326(all.)X +1467(LIBTP)X +1710(submits)X +1979(all)X +2080(the)X +2199(queries)X +2452(to)X +2535(the)X +2653(server)X +2870(process,)X +3151(and)X +3287(receives)X +3571(database)X +3868(records)X +4125(back;)X +555 3207(it)N +619(does)X +786(no)X +886(client)X +1084(caching.)X +755 3330(The)N +911(RDBMS)X +1221(architecture)X +1632(is)X +1716(much)X +1925(closer)X +2148(to)X +2241(that)X +2392(of)X +2490(LIBTP.)X +2783(A)X +2872(server)X +3100(process)X +3372(receives)X +3667(queries)X +3930(and)X +4076(returns)X +555 3420(results)N +786(to)X +870(a)X +928(client.)X +1168(The)X +1315(timing)X +1545(results)X +1776(in)X +1860(table)X +2038(two)X +2180(clearly)X +2421(show)X +2612(that)X +2754(the)X +2874(conventional)X +3309(database)X +3607(client/server)X +4025(model)X +4246(is)X +555 3510(expensive.)N +941(LIBTP)X +1188(outperforms)X +1605(the)X +1728(RDBMS)X +2032(on)X +2136(traversal)X +2437(and)X +2577(insertion.)X +2921(We)X +3057(speculate)X +3380(that)X +3524(this)X +3663(is)X +3740(due)X +3880(in)X +3966(part)X +4115(to)X +4201(the)X +555 3600(overhead)N +870(of)X +957(query)X +1160(parsing,)X +1436(optimization,)X +1880(and)X +2016(repeated)X +2309(interpretation)X +2761(of)X +2848(the)X +2966(plan)X +3124(tree)X +3265(in)X +3347(the)X +3465(RDBMS')X +3791(query)X +3994(executor.)X +755 3723(Table)N +962(three)X +1147(shows)X +1371(the)X +1492(differences)X +1873(between)X +2164(local)X +2343(and)X +2482(remote)X +2728(execution)X +3063(of)X +3153(LIBTP's)X +3456(OO1)X +3635(implementation)X +4160(on)X +4263(a)X +555 3813(DECstation.)N +989(We)X +1122(measured)X +1451(performance)X +1879(with)X +2042(a)X +2099(populated)X +2436(\(warm\))X +2694(cache)X +2899(and)X +3036(an)X +3133(empty)X +3354(\(cold\))X +3567(cache.)X +3812(Reported)X +4126(times)X +555 3903(are)N +681(the)X +806(means)X +1037(of)X +1130(twenty)X +1374(tests,)X +1562(and)X +1704(are)X +1829(in)X +1917(seconds.)X +2237(Standard)X +2548(deviations)X +2903(were)X +3086(within)X +3316(seven)X +3525(percent)X +3788(of)X +3881(the)X +4005(mean)X +4205(for)X +555 3993(remote,)N +818(and)X +954(two)X +1094(percent)X +1351(of)X +1438(the)X +1556(mean)X +1750(for)X +1864(local.)X +755 4116(The)N +914(20ms)X +1121(overhead)X +1450(of)X +1551(TCP/IP)X +1824(on)X +1938(an)X +2048(Ethernet)X +2354(entirely)X +2633(accounts)X +2948(for)X +3076(the)X +3207(difference)X +3567(in)X +3662(speed.)X +3918(The)X +4076(remote)X +555 4206(traversal)N +857(times)X +1055(are)X +1179(nearly)X +1405(double)X +1648(the)X +1771(local)X +1952(times)X +2150(because)X +2430(we)X +2549(do)X +2653(index)X +2855(lookups)X +3132(and)X +3272(part)X +3421(fetches)X +3673(in)X +3759(separate)X +4047(queries.)X +555 4296(It)N +629(would)X +854(make)X +1053(sense)X +1252(to)X +1339(do)X +1444(indexed)X +1723(searches)X +2021(on)X +2126(the)X +2248(server,)X +2489(but)X +2615(we)X +2733(were)X +2914(unwilling)X +3244(to)X +3330(hard-code)X +3676(knowledge)X +4052(of)X +4143(OO1)X +555 4386(indices)N +803(into)X +948(our)X +1075(LIBTP)X +1317(TCL)X +1488(server.)X +1745(Cold)X +1920(and)X +2056(warm)X +2259(insertion)X +2559(times)X +2752(are)X +2871(identical)X +3167(since)X +3352(insertions)X +3683(do)X +3783(not)X +3905(bene\256t)X +4143(from)X +555 4476(caching.)N +755 4599(One)N +915(interesting)X +1279(difference)X +1632(shown)X +1867(by)X +1973(table)X +2155(three)X +2342(is)X +2421(the)X +2545(cost)X +2700(of)X +2793(forward)X +3074(versus)X +3305(backward)X +3644(traversal.)X +3987(When)X +4205(we)X +555 4689(built)N +725(the)X +847(database,)X +1168(we)X +1285(inserted)X +1562(parts)X +1741(in)X +1826(part)X +2 f +1974(id)X +1 f +2059(order.)X +2292(We)X +2427(built)X +2596(the)X +2717(indices)X +2967(at)X +3048(the)X +3169(same)X +3357(time.)X +3562(Therefore,)X +3923(the)X +4044(forward)X +555 4779(index)N +757(had)X +897(keys)X +1068(inserted)X +1346(in)X +1432(order,)X +1646(while)X +1848(the)X +1970(backward)X +2307(index)X +2509(had)X +2649(keys)X +2820(inserted)X +3098(more)X +3286(randomly.)X +3656(In-order)X +3943(insertion)X +4246(is)X +555 4885(pessimal)N +858(for)X +975(B-tree)X +1199(indices,)X +1469(so)X +1563(the)X +1684(forward)X +1962(index)X +2163(is)X +2239(much)X +2440(larger)X +2651(than)X +2812(the)X +2933(backward)X +3269(one)X +7 s +3385 4853(5)N +10 s +4885(.)Y +3476(This)X +3640(larger)X +3850(size)X +3997(shows)X +4219(up)X +555 4975(as)N +642(extra)X +823(disk)X +976(reads)X +1166(in)X +1248(the)X +1366(cold)X +1524(benchmark.)X +3 f +555 5161(6.)N +655(Conclusions)X +1 f +755 5284(LIBTP)N +1006(provides)X +1311(the)X +1438(basic)X +1632(building)X +1927(blocks)X +2165(to)X +2256(support)X +2525(transaction)X +2906(protection.)X +3300(In)X +3396(comparison)X +3799(with)X +3970(traditional)X +555 5374(Unix)N +746(libraries)X +1040(and)X +1187(commercial)X +1597(systems,)X +1900(it)X +1974(offers)X +2192(a)X +2258(variety)X +2511(of)X +2608(tradeoffs.)X +2964(Using)X +3185(complete)X +3509(transaction)X +3891(protection)X +4246(is)X +555 5464(more)N +747(complicated)X +1166(than)X +1331(simply)X +1575(adding)X +3 f +1820(fsync)X +1 f +1998(\(2\))X +2119(and)X +3 f +2262(\257ock)X +1 f +2426(\(2\))X +2547(calls)X +2721(to)X +2810(code,)X +3008(but)X +3136(it)X +3206(is)X +3285(faster)X +3490(in)X +3578(some)X +3773(cases)X +3969(and)X +4111(offers)X +8 s +10 f +555 5536(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5614(5)N +8 s +763 5639(The)N +878(next)X +1004(release)X +1196(of)X +1265(the)X +1359(4.4BSD)X +1580(access)X +1758(method)X +1966(will)X +2082(automatically)X +2446(detect)X +2614(and)X +2722(compensate)X +3039(for)X +3129(in-order)X +3350(insertion,)X +3606(eliminating)X +3914(this)X +4023(problem.)X + +16 p +%%Page: 16 16 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(stricter)N +801(guarantees)X +1168(\(atomicity,)X +1540(consistency,)X +1957(isolation,)X +2275(and)X +2414(durability\).)X +2815(If)X +2892(the)X +3013(data)X +3170(to)X +3255(be)X +3354(protected)X +3676(are)X +3798(already)X +4058(format-)X +555 720(ted)N +675(\()X +2 f +702(i.e.)X +1 f +821(use)X +949(one)X +1086(of)X +1174(the)X +1293(database)X +1591(access)X +1818(methods\),)X +2157(then)X +2316(adding)X +2555(transaction)X +2928(protection)X +3274(requires)X +3554(no)X +3655(additional)X +3996(complex-)X +555 810(ity,)N +679(but)X +801(incurs)X +1017(a)X +1073(performance)X +1500(penalty)X +1756(of)X +1843(approximately)X +2326(15%.)X +755 933(In)N +844(comparison)X +1240(with)X +1404(commercial)X +1805(database)X +2104(systems,)X +2399(the)X +2519(tradeoffs)X +2827(are)X +2948(more)X +3135(complex.)X +3473(LIBTP)X +3717(does)X +3886(not)X +4009(currently)X +555 1023(support)N +825(a)X +891(standard)X +1193(query)X +1406(language.)X +1766(The)X +1921(TCL-based)X +2312(server)X +2539(process)X +2810(allows)X +3049(a)X +3115(certain)X +3364(ease)X +3533(of)X +3630(use)X +3767(which)X +3993(would)X +4223(be)X +555 1113(enhanced)N +882(with)X +1047(a)X +1106(more)X +1294(user-friendly)X +1732(interface)X +2037(\()X +2 f +2064(e.g.)X +1 f +2203(a)X +2261(windows)X +2572(based)X +2777(query-by-form)X +3272(application\),)X +3697(for)X +3813(which)X +4031(we)X +4147(have)X +555 1203(a)N +620(working)X +916(prototype.)X +1292(When)X +1513(accesses)X +1815(do)X +1924(not)X +2055(require)X +2312(sophisticated)X +2758(query)X +2969(processing,)X +3360(the)X +3486(TCL)X +3665(interface)X +3975(is)X +4056(an)X +4160(ade-)X +555 1293(quate)N +756(solution.)X +1080(What)X +1281(LIBTP)X +1529(fails)X +1693(to)X +1781(provide)X +2052(in)X +2140(functionality,)X +2595(it)X +2665(makes)X +2896(up)X +3002(for)X +3122(in)X +3210(performance)X +3643(and)X +3785(\257exibility.)X +4161(Any)X +555 1383(application)N +931(may)X +1089(make)X +1283(use)X +1410(of)X +1497(its)X +1592(record)X +1818(interface)X +2120(or)X +2207(the)X +2325(more)X +2510(primitive)X +2823(log,)X +2965(lock,)X +3143(and)X +3279(buffer)X +3496(calls.)X +755 1506(Future)N +987(work)X +1175(will)X +1322(focus)X +1519(on)X +1621(overcoming)X +2026(some)X +2217(of)X +2306(the)X +2426(areas)X +2614(in)X +2698(which)X +2916(LIBTP)X +3160(is)X +3235(currently)X +3547(de\256cient)X +3845(and)X +3983(extending)X +555 1596(its)N +652(transaction)X +1026(model.)X +1288(The)X +1435(addition)X +1719(of)X +1808(an)X +1905(SQL)X +2077(parser)X +2295(and)X +2432(forms)X +2640(front)X +2817(end)X +2954(will)X +3099(improve)X +3387(the)X +3506(system's)X +3807(ease)X +3967(of)X +4055(use)X +4183(and)X +555 1686(make)N +750(it)X +815(more)X +1001(competitive)X +1400(with)X +1563(commercial)X +1963(systems.)X +2277(In)X +2365(the)X +2484(long)X +2647(term,)X +2835(we)X +2950(would)X +3170(like)X +3310(to)X +3392(add)X +3528(generalized)X +3919(hierarchical)X +555 1776(locking,)N +836(nested)X +1062(transactions,)X +1486(parallel)X +1748(transactions,)X +2171(passing)X +2431(of)X +2518(transactions)X +2921(between)X +3209(processes,)X +3557(and)X +3693(distributed)X +4055(commit)X +555 1866(handling.)N +900(In)X +992(the)X +1115(short)X +1300(term,)X +1492(the)X +1614(next)X +1776(step)X +1929(is)X +2006(to)X +2092(integrate)X +2397(LIBTP)X +2643(with)X +2809(the)X +2931(most)X +3110(recent)X +3331(release)X +3579(of)X +3670(the)X +3792(database)X +4093(access)X +555 1956(routines)N +833(and)X +969(make)X +1163(it)X +1227(freely)X +1435(available)X +1745(via)X +1863(anonymous)X +2252(ftp.)X +3 f +555 2142(7.)N +655(Acknowledgements)X +1 f +755 2265(We)N +888(would)X +1109(like)X +1250(to)X +1332(thank)X +1530(John)X +1701(Wilkes)X +1948(and)X +2084(Carl)X +2242(Staelin)X +2484(of)X +2571(Hewlett-Packard)X +3131(Laboratories)X +3557(and)X +3693(Jon)X +3824(Krueger.)X +4148(John)X +555 2355(and)N +694(Carl)X +855(provided)X +1162(us)X +1255(with)X +1419(an)X +1517(extra)X +1700(disk)X +1855(for)X +1971(the)X +2091(HP)X +2215(testbed)X +2464(less)X +2606(than)X +2766(24)X +2868(hours)X +3068(after)X +3238(we)X +3354(requested)X +3684(it.)X +3770(Jon)X +3903(spent)X +4094(count-)X +555 2445(less)N +699(hours)X +901(helping)X +1164(us)X +1258(understand)X +1633(the)X +1754(intricacies)X +2107(of)X +2197(commercial)X +2599(database)X +2899(products)X +3198(and)X +3337(their)X +3507(behavior)X +3811(under)X +4017(a)X +4076(variety)X +555 2535(of)N +642(system)X +884(con\256gurations.)X +3 f +555 2721(8.)N +655(References)X +1 f +555 2901([ANDR89])N +942(Andrade,)X +1265(J.,)X +1361(Carges,)X +1629(M.,)X +1765(Kovach,)X +2060(K.,)X +2183(``Building)X +2541(an)X +2642(On-Line)X +2939(Transaction)X +3343(Processing)X +3715(System)X +3975(On)X +4098(UNIX)X +727 2991(System)N +982(V'',)X +2 f +1134(CommUNIXations)X +1 f +1725(,)X +1765 0.2188(November/December)AX +2477(1989.)X +555 3171([BAY77])N +878(Bayer,)X +1110(R.,)X +1223(Schkolnick,)X +1623(M.,)X +1754(``Concurrency)X +2243(of)X +2330(Operations)X +2702(on)X +2802(B-Trees'',)X +2 f +3155(Acta)X +3322(Informatica)X +1 f +3700(,)X +3740(1977.)X +555 3351([BERN80])N +936(Bernstein,)X +1297(P.,)X +1415(Goodman,)X +1785(N.,)X +1917(``Timestamp)X +2365(Based)X +2595(Algorithms)X +2992(for)X +3119(Concurrency)X +3567(Control)X +3844(in)X +3939(Distributed)X +727 3441(Database)N +1042(Systems'',)X +2 f +1402(Proceedings)X +1823(6th)X +1945(International)X +2387(Conference)X +2777(on)X +2877(Very)X +3049(Large)X +3260(Data)X +3440(Bases)X +1 f +3627(,)X +3667(October)X +3946(1980.)X +555 3621([BSD91])N +864(DB\(3\),)X +2 f +1109(4.4BSD)X +1376(Unix)X +1552(Programmer's)X +2044(Manual)X +2313(Reference)X +2655(Guide)X +1 f +2851(,)X +2891(University)X +3249(of)X +3336(California,)X +3701(Berkeley,)X +4031(1991.)X +555 3801([CATT91])N +923(Cattell,)X +1181(R.G.G.,)X +1455(``An)X +1632(Engineering)X +2049(Database)X +2369(Benchmark'',)X +2 f +2838(The)X +2983(Benchmark)X +3373(Handbook)X +3731(for)X +3848(Database)X +4179(and)X +727 3891(Transaction)N +1133(Processing)X +1509(Systems)X +1 f +1763(,)X +1803(J.)X +1874(Gray,)X +2075(editor,)X +2302(Morgan)X +2576(Kaufman)X +2895(1991.)X +555 4071([CHEN91])N +929(Cheng,)X +1180(E.,)X +1291(Chang,)X +1542(E.,)X +1653(Klein,)X +1872(J.,)X +1964(Lee,)X +2126(D.,)X +2245(Lu,)X +2375(E.,)X +2485(Lutgardo,)X +2820(A.,)X +2939(Obermarck,)X +3342(R.,)X +3456(``An)X +3629(Open)X +3824(and)X +3961(Extensible)X +727 4161(Event-Based)N +1157(Transaction)X +1556(Manager'',)X +2 f +1936(Proceedings)X +2357(1991)X +2537(Summer)X +2820(Usenix)X +1 f +3043(,)X +3083(Nashville,)X +3430(TN,)X +3577(June)X +3744(1991.)X +555 4341([CHOU85])N +943(Chou,)X +1163(H.,)X +1288(DeWitt,)X +1570(D.,)X +1694(``An)X +1872(Evaluation)X +2245(of)X +2338(Buffer)X +2574(Management)X +3019(Strategies)X +3361(for)X +3481(Relational)X +3836(Database)X +4157(Sys-)X +727 4431(tems'',)N +2 f +972(Proceedings)X +1393(of)X +1475(the)X +1593(11th)X +1755(International)X +2197(Conference)X +2587(on)X +2687(Very)X +2859(Large)X +3070(Databases)X +1 f +3408(,)X +3448(1985.)X +555 4611([DEWI84])N +925(DeWitt,)X +1207(D.,)X +1331(Katz,)X +1529(R.,)X +1648(Olken,)X +1890(F.,)X +2000(Shapiro,)X +2295(L.,)X +2410(Stonebraker,)X +2843(M.,)X +2979(Wood,)X +3220(D.,)X +3343(``Implementation)X +3929(Techniques)X +727 4701(for)N +841(Main)X +1030(Memory)X +1326(Database)X +1641(Systems'',)X +2 f +2001(Proceedings)X +2422(of)X +2504(SIGMOD)X +1 f +2812(,)X +2852(pp.)X +2972(1-8,)X +3119(June)X +3286(1984.)X +555 4881([GRAY76])N +944(Gray,)X +1153(J.,)X +1252(Lorie,)X +1474(R.,)X +1595(Putzolu,)X +1887(F.,)X +1999(and)X +2143(Traiger,)X +2428(I.,)X +2522(``Granularity)X +2973(of)X +3067(locks)X +3263(and)X +3406(degrees)X +3679(of)X +3773(consistency)X +4174(in)X +4263(a)X +727 4971(large)N +909(shared)X +1140(data)X +1295(base'',)X +2 f +1533(Modeling)X +1861(in)X +1944(Data)X +2125(Base)X +2301(Management)X +2740(Systems)X +1 f +2994(,)X +3034(Elsevier)X +3317(North)X +3524(Holland,)X +3822(New)X +3994(York,)X +4199(pp.)X +727 5061(365-394.)N +555 5241([HAER83])N +931(Haerder,)X +1235(T.)X +1348(Reuter,)X +1606(A.)X +1728(``Principles)X +2126(of)X +2217(Transaction-Oriented)X +2928(Database)X +3246(Recovery'',)X +2 f +3651(Computing)X +4029(Surveys)X +1 f +4279(,)X +727 5331(15\(4\);)N +943(237-318,)X +1250(1983.)X +555 5511([KUNG81])N +943(Kung,)X +1162(H.)X +1261(T.,)X +1371(Richardson,)X +1777(J.,)X +1869(``On)X +2042(Optimistic)X +2400(Methods)X +2701(for)X +2816(Concurrency)X +3252(Control'',)X +2 f +3591(ACM)X +3781(Transactions)X +4219(on)X +727 5601(Database)N +1054(Systems)X +1 f +1328(6\(2\);)X +1504(213-226,)X +1811(1981.)X + +17 p +%%Page: 17 17 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630([LEHM81])N +939(Lehman,)X +1245(P.,)X +1352(Yao,)X +1529(S.,)X +1636(``Ef\256cient)X +1989(Locking)X +2279(for)X +2396(Concurrent)X +2780(Operations)X +3155(on)X +3258(B-trees'',)X +2 f +3587(ACM)X +3779(Transactions)X +4219(on)X +727 720(Database)N +1054(Systems)X +1 f +1308(,)X +1348(6\(4\),)X +1522(December)X +1873(1981.)X +555 900([MOHA91])N +964(Mohan,)X +1241(C.,)X +1364(Pirahesh,)X +1690(H.,)X +1818(``ARIES-RRH:)X +2366(Restricted)X +2721(Repeating)X +3076(of)X +3173(History)X +3442(in)X +3533(the)X +3660(ARIES)X +3920(Transaction)X +727 990(Recovery)N +1055(Method'',)X +2 f +1398(Proceedings)X +1819(7th)X +1941(International)X +2383(Conference)X +2773(on)X +2873(Data)X +3053(Engineering)X +1 f +3449(,)X +3489(Kobe,)X +3703(Japan,)X +3926(April)X +4115(1991.)X +555 1170([NODI90])N +914(Nodine,)X +1194(M.,)X +1328(Zdonik,)X +1602(S.,)X +1709(``Cooperative)X +2178(Transaction)X +2580(Hierarchies:)X +2996(A)X +3077(Transaction)X +3479(Model)X +3711(to)X +3796(Support)X +4072(Design)X +727 1260(Applications'',)N +2 f +1242(Proceedings)X +1675(16th)X +1849(International)X +2303(Conference)X +2704(on)X +2815(Very)X +2998(Large)X +3220(Data)X +3411(Bases)X +1 f +3598(,)X +3649(Brisbane,)X +3985(Australia,)X +727 1350(August)N +978(1990.)X +555 1530([OUST90])N +923(Ousterhout,)X +1324(J.,)X +1420(``Tcl:)X +1648(An)X +1771(Embeddable)X +2197(Command)X +2555(Language'',)X +2 f +2971(Proceedings)X +3396(1990)X +3580(Winter)X +3822(Usenix)X +1 f +4045(,)X +4089(Wash-)X +727 1620(ington,)N +971(D.C.,)X +1162(January)X +1432(1990.)X +555 1800([POSIX91])N +955(``Unapproved)X +1441(Draft)X +1645(for)X +1773(Realtime)X +2096(Extension)X +2450(for)X +2578(Portable)X +2879(Operating)X +3234(Systems'',)X +3608(Draft)X +3812(11,)X +3946(October)X +4239(7,)X +727 1890(1991,)N +927(IEEE)X +1121(Computer)X +1461(Society.)X +555 2070([ROSE91])N +925(Rosenblum,)X +1341(M.,)X +1484(Ousterhout,)X +1892(J.,)X +1995(``The)X +2206(Design)X +2464(and)X +2611(Implementation)X +3149(of)X +3247(a)X +3314(Log-Structured)X +3835(File)X +3990(System'',)X +2 f +727 2160(Proceedings)N +1148(of)X +1230(the)X +1348(13th)X +1510(Symposium)X +1895(on)X +1995(Operating)X +2344(Systems)X +2618(Principles)X +1 f +2947(,)X +2987(1991.)X +555 2340([SELT91])N +904(Seltzer,)X +1171(M.,)X +1306(Stonebraker,)X +1738(M.,)X +1873(``Read)X +2116(Optimized)X +2478(File)X +2626(Systems:)X +2938(A)X +3020(Performance)X +3454(Evaluation'',)X +2 f +3898(Proceedings)X +727 2430(7th)N +849(Annual)X +1100(International)X +1542(Conference)X +1932(on)X +2032(Data)X +2212(Engineering)X +1 f +2608(,)X +2648(Kobe,)X +2862(Japan,)X +3085(April)X +3274(1991.)X +555 2610([SPEC88])N +907(Spector,)X +1200(Rausch,)X +1484(Bruell,)X +1732(``Camelot:)X +2107(A)X +2192(Flexible,)X +2501(Distributed)X +2888(Transaction)X +3294(Processing)X +3668(System'',)X +2 f +4004(Proceed-)X +727 2700(ings)N +880(of)X +962(Spring)X +1195(COMPCON)X +1606(1988)X +1 f +(,)S +1806(February)X +2116(1988.)X +555 2880([SQL86])N +862(American)X +1201(National)X +1499(Standards)X +1836(Institute,)X +2139(``Database)X +2509(Language)X +2847(SQL'',)X +3093(ANSI)X +3301(X3.135-1986)X +3747(\(ISO)X +3924(9075\),)X +4152(May)X +727 2970(1986.)N +555 3150([STON81])N +919(Stonebraker,)X +1348(M.,)X +1480(``Operating)X +1876(System)X +2132(Support)X +2406(for)X +2520(Database)X +2835(Management'',)X +2 f +3348(Communications)X +3910(of)X +3992(the)X +4110(ACM)X +1 f +4279(,)X +727 3240(1981.)N +555 3420([SULL92])N +925(Sullivan,)X +1247(M.,)X +1394(Olson,)X +1641(M.,)X +1788(``An)X +1976(Index)X +2195(Implementation)X +2737(Supporting)X +3127(Fast)X +3295(Recovery)X +3638(for)X +3767(the)X +3900(POSTGRES)X +727 3510(Storage)N +1014(System'',)X +1365(to)X +1469(appear)X +1726(in)X +2 f +1830(Proceedings)X +2272(8th)X +2415(Annual)X +2687(International)X +3150(Conference)X +3561(on)X +3682(Data)X +3883(Engineering)X +1 f +4279(,)X +727 3600(Tempe,)N +990(Arizona,)X +1289(February)X +1599(1992.)X +555 3780([TPCB90])N +914(Transaction)X +1319(Processing)X +1692(Performance)X +2129(Council,)X +2428(``TPC)X +2653(Benchmark)X +3048(B'',)X +3200(Standard)X +3510(Speci\256cation,)X +3973(Waterside)X +727 3870(Associates,)N +1110(Fremont,)X +1421(CA.,)X +1592(1990.)X +555 4050([YOUN91])N +947(Young,)X +1211(M.)X +1328(W.,)X +1470(Thompson,)X +1858(D.)X +1962(S.,)X +2072(Jaffe,)X +2274(E.,)X +2388(``A)X +2525(Modular)X +2826(Architecture)X +3253(for)X +3372(Distributed)X +3757(Transaction)X +4161(Pro-)X +727 4140(cessing'',)N +2 f +1057(Proceedings)X +1478(1991)X +1658(Winter)X +1896(Usenix)X +1 f +2119(,)X +2159(Dallas,)X +2404(TX,)X +2551(January)X +2821(1991.)X +3 f +755 4263(Margo)N +1008(I.)X +1080(Seltzer)X +1 f +1338(is)X +1411(a)X +1467(Ph.D.)X +1669(student)X +1920(in)X +2002(the)X +2120(Department)X +2519(of)X +2606(Electrical)X +2934(Engineering)X +3346(and)X +3482(Computer)X +3822(Sciences)X +4123(at)X +4201(the)X +555 4353(University)N +919(of)X +1012(California,)X +1383(Berkeley.)X +1739(Her)X +1886(research)X +2181(interests)X +2474(include)X +2735(\256le)X +2862(systems,)X +3160(databases,)X +3513(and)X +3654(transaction)X +4031(process-)X +555 4443(ing)N +686(systems.)X +1008(She)X +1157(spent)X +1355(several)X +1612(years)X +1811(working)X +2107(at)X +2194(startup)X +2441(companies)X +2813(designing)X +3153(and)X +3298(implementing)X +3771(\256le)X +3902(systems)X +4183(and)X +555 4533(transaction)N +929(processing)X +1294(software)X +1592(and)X +1729(designing)X +2061(microprocessors.)X +2648(Ms.)X +2791(Seltzer)X +3035(received)X +3329(her)X +3453(AB)X +3585(in)X +3668(Applied)X +3947(Mathemat-)X +555 4623(ics)N +664(from)X +840 0.1953(Harvard/Radcliffe)AX +1445(College)X +1714(in)X +1796(1983.)X +755 4746(In)N +845(her)X +971(spare)X +1163(time,)X +1347(Margo)X +1583(can)X +1717(usually)X +1970(be)X +2068(found)X +2277(preparing)X +2607(massive)X +2887(quantities)X +3220(of)X +3309(food)X +3478(for)X +3594(hungry)X +3843(hordes,)X +4099(study-)X +555 4836(ing)N +677(Japanese,)X +1003(or)X +1090(playing)X +1350(soccer)X +1576(with)X +1738(an)X +1834(exciting)X +2112(Bay)X +2261(Area)X +2438(Women's)X +2770(Soccer)X +3009(team,)X +3205(the)X +3323(Berkeley)X +3633(Bruisers.)X +3 f +755 5049(Michael)N +1056(A.)X +1159(Olson)X +1 f +1383(is)X +1461(a)X +1522(Master's)X +1828(student)X +2084(in)X +2170(the)X +2292(Department)X +2695(of)X +2786(Electrical)X +3118(Engineering)X +3534(and)X +3674(Computer)X +4018(Sciences)X +555 5139(at)N +645(the)X +774(University)X +1143(of)X +1241(California,)X +1617(Berkeley.)X +1978(His)X +2120(primary)X +2405(interests)X +2703(are)X +2833(database)X +3141(systems)X +3425(and)X +3572(mass)X +3763(storage)X +4026(systems.)X +555 5229(Mike)N +759(spent)X +963(two)X +1118(years)X +1323(working)X +1625(for)X +1754(a)X +1825(commercial)X +2239(database)X +2551(system)X +2808(vendor)X +3066(before)X +3307(joining)X +3567(the)X +3699(Postgres)X +4004(Research)X +555 5319(Group)N +780(at)X +858(Berkeley)X +1168(in)X +1250(1988.)X +1470(He)X +1584(received)X +1877(his)X +1990(B.A.)X +2161(in)X +2243(Computer)X +2583(Science)X +2853(from)X +3029(Berkeley)X +3339(in)X +3421(May)X +3588(1991.)X +755 5442(Mike)N +945(only)X +1108(recently)X +1388(transferred)X +1758(into)X +1903(Sin)X +2030(City,)X +2208(but)X +2330(is)X +2403(rapidly)X +2650(adopting)X +2950(local)X +3126(customs)X +3408(and)X +3544(coloration.)X +3929(In)X +4016(his)X +4129(spare)X +555 5532(time,)N +742(he)X +843(organizes)X +1176(informal)X +1477(Friday)X +1711(afternoon)X +2043(study)X +2240(groups)X +2482(to)X +2568(discuss)X +2823(recent)X +3044(technical)X +3358(and)X +3498(economic)X +3834(developments.)X +555 5622(Among)N +815(his)X +928(hobbies)X +1197(are)X +1316(Charles)X +1581(Dickens,)X +1884(Red)X +2033(Rock,)X +2242(and)X +2378(speaking)X +2683(Dutch)X +2899(to)X +2981(anyone)X +3233(who)X +3391(will)X +3535(permit)X +3764(it.)X + +17 p +%%Trailer +xt + +xs + diff --git a/lib/libc/db/hash/extern.h b/lib/libc/db/hash/extern.h index b7ef37a..3167e6d 100644 --- a/lib/libc/db/hash/extern.h +++ b/lib/libc/db/hash/extern.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)extern.h 8.2 (Berkeley) 2/21/94 + * @(#)extern.h 8.4 (Berkeley) 6/16/94 */ BUFHEAD *__add_ovflpage __P((HTAB *, BUFHEAD *)); @@ -40,26 +40,26 @@ int __big_insert __P((HTAB *, BUFHEAD *, const DBT *, const DBT *)); int __big_keydata __P((HTAB *, BUFHEAD *, DBT *, DBT *, int)); int __big_return __P((HTAB *, BUFHEAD *, int, DBT *, int)); int __big_split __P((HTAB *, BUFHEAD *, BUFHEAD *, BUFHEAD *, - int, u_int, SPLIT_RETURN *)); + int, u_int32_t, SPLIT_RETURN *)); int __buf_free __P((HTAB *, int, int)); void __buf_init __P((HTAB *, int)); -u_int __call_hash __P((HTAB *, char *, int)); +u_int32_t __call_hash __P((HTAB *, char *, int)); int __delpair __P((HTAB *, BUFHEAD *, int)); int __expand_table __P((HTAB *)); int __find_bigpair __P((HTAB *, BUFHEAD *, int, char *, int)); -u_short __find_last_page __P((HTAB *, BUFHEAD **)); +u_int16_t __find_last_page __P((HTAB *, BUFHEAD **)); void __free_ovflpage __P((HTAB *, BUFHEAD *)); -BUFHEAD *__get_buf __P((HTAB *, u_int, BUFHEAD *, int)); -int __get_page __P((HTAB *, char *, u_int, int, int, int)); -int __init_bitmap __P((HTAB *, int, int, int)); -u_int __log2 __P((u_int)); -int __put_page __P((HTAB *, char *, u_int, int, int)); +BUFHEAD *__get_buf __P((HTAB *, u_int32_t, BUFHEAD *, int)); +int __get_page __P((HTAB *, char *, u_int32_t, int, int, int)); +int __ibitmap __P((HTAB *, int, int, int)); +u_int32_t __log2 __P((u_int32_t)); +int __put_page __P((HTAB *, char *, u_int32_t, int, int)); void __reclaim_buf __P((HTAB *, BUFHEAD *)); -int __split_page __P((HTAB *, u_int, u_int)); +int __split_page __P((HTAB *, u_int32_t, u_int32_t)); /* Default hash routine. */ extern u_int32_t (*__default_hash) __P((const void *, size_t)); #ifdef HASH_STATISTICS -extern long hash_accesses, hash_collisions, hash_expansions, hash_overflows; +extern int hash_accesses, hash_collisions, hash_expansions, hash_overflows; #endif diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c index 6a23f3d..4b7b732 100644 --- a/lib/libc/db/hash/hash.c +++ b/lib/libc/db/hash/hash.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hash.c 8.7 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)hash.c 8.9 (Berkeley) 6/16/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -60,13 +60,13 @@ static int alloc_segs __P((HTAB *, int)); static int flush_meta __P((HTAB *)); static int hash_access __P((HTAB *, ACTION, DBT *, DBT *)); static int hash_close __P((DB *)); -static int hash_delete __P((const DB *, const DBT *, u_int)); +static int hash_delete __P((const DB *, const DBT *, u_int32_t)); static int hash_fd __P((const DB *)); -static int hash_get __P((const DB *, const DBT *, DBT *, u_int)); -static int hash_put __P((const DB *, DBT *, const DBT *, u_int)); +static int hash_get __P((const DB *, const DBT *, DBT *, u_int32_t)); +static int hash_put __P((const DB *, DBT *, const DBT *, u_int32_t)); static void *hash_realloc __P((SEGMENT **, int, int)); -static int hash_seq __P((const DB *, DBT *, DBT *, u_int)); -static int hash_sync __P((const DB *, u_int)); +static int hash_seq __P((const DB *, DBT *, DBT *, u_int32_t)); +static int hash_sync __P((const DB *, u_int32_t)); static int hdestroy __P((HTAB *)); static HTAB *init_hash __P((HTAB *, const char *, HASHINFO *)); static int init_htab __P((HTAB *, int)); @@ -86,7 +86,7 @@ static void swap_header_copy __P((HASHHDR *, HASHHDR *)); #define ABNORMAL (1) #ifdef HASH_STATISTICS -long hash_accesses, hash_collisions, hash_expansions, hash_overflows; +int hash_accesses, hash_collisions, hash_expansions, hash_overflows; #endif /************************** INTERFACE ROUTINES ***************************/ @@ -179,7 +179,7 @@ __hash_open(file, flags, mode, info, dflags) (hashp->BSHIFT + BYTE_SHIFT); hashp->nmaps = bpages; - (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_long *)); + (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *)); } /* Initialize Buffer Manager */ @@ -366,7 +366,7 @@ init_htab(hashp, nelem) hashp->LAST_FREED = 2; /* First bitmap page is at: splitpoint l2 page offset 1 */ - if (__init_bitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) + if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) return (-1); hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1; @@ -451,7 +451,7 @@ hdestroy(hashp) static int hash_sync(dbp, flags) const DB *dbp; - u_int flags; + u_int32_t flags; { HTAB *hashp; @@ -530,7 +530,7 @@ hash_get(dbp, key, data, flag) const DB *dbp; const DBT *key; DBT *data; - u_int flag; + u_int32_t flag; { HTAB *hashp; @@ -547,7 +547,7 @@ hash_put(dbp, key, data, flag) const DB *dbp; DBT *key; const DBT *data; - u_int flag; + u_int32_t flag; { HTAB *hashp; @@ -568,7 +568,7 @@ static int hash_delete(dbp, key, flag) const DB *dbp; const DBT *key; - u_int flag; /* Ignored */ + u_int32_t flag; /* Ignored */ { HTAB *hashp; @@ -595,10 +595,10 @@ hash_access(hashp, action, key, val) { register BUFHEAD *rbufp; BUFHEAD *bufp, *save_bufp; - register u_short *bp; + register u_int16_t *bp; register int n, ndx, off, size; register char *kp; - u_short pageno; + u_int16_t pageno; #ifdef HASH_STATISTICS hash_accesses++; @@ -614,7 +614,7 @@ hash_access(hashp, action, key, val) /* Pin the bucket chain */ rbufp->flags |= BUF_PIN; - for (bp = (u_short *)rbufp->page, n = *bp++, ndx = 1; ndx < n;) + for (bp = (u_int16_t *)rbufp->page, n = *bp++, ndx = 1; ndx < n;) if (bp[1] >= REAL_KEY) { /* Real key/data pair */ if (size == off - *bp && @@ -633,7 +633,7 @@ hash_access(hashp, action, key, val) return (ERROR); } /* FOR LOOP INIT */ - bp = (u_short *)rbufp->page; + bp = (u_int16_t *)rbufp->page; n = *bp++; ndx = 1; off = hashp->BSIZE; @@ -655,7 +655,7 @@ hash_access(hashp, action, key, val) return (ERROR); } /* FOR LOOP INIT */ - bp = (u_short *)rbufp->page; + bp = (u_int16_t *)rbufp->page; n = *bp++; ndx = 1; off = hashp->BSIZE; @@ -689,7 +689,7 @@ found: save_bufp->flags &= ~BUF_PIN; return (ABNORMAL); case HASH_GET: - bp = (u_short *)rbufp->page; + bp = (u_int16_t *)rbufp->page; if (bp[ndx + 1] < REAL_KEY) { if (__big_return(hashp, rbufp, ndx, val, 0)) return (ERROR); @@ -720,12 +720,12 @@ static int hash_seq(dbp, key, data, flag) const DB *dbp; DBT *key, *data; - u_int flag; + u_int32_t flag; { - register u_int bucket; + register u_int32_t bucket; register BUFHEAD *bufp; HTAB *hashp; - u_short *bp, ndx; + u_int16_t *bp, ndx; hashp = (HTAB *)dbp->internal; if (flag && flag != R_FIRST && flag != R_NEXT) { @@ -750,7 +750,7 @@ hash_seq(dbp, key, data, flag) if (!bufp) return (ERROR); hashp->cpage = bufp; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; if (bp[0]) break; } @@ -760,7 +760,7 @@ hash_seq(dbp, key, data, flag) return (ABNORMAL); } } else - bp = (u_short *)hashp->cpage->page; + bp = (u_int16_t *)hashp->cpage->page; #ifdef DEBUG assert(bp); @@ -771,7 +771,7 @@ hash_seq(dbp, key, data, flag) __get_buf(hashp, bp[hashp->cndx], bufp, 0); if (!bufp) return (ERROR); - bp = (u_short *)(bufp->page); + bp = (u_int16_t *)(bufp->page); hashp->cndx = 1; } if (!bp[0]) { @@ -810,7 +810,7 @@ extern int __expand_table(hashp) HTAB *hashp; { - u_int old_bucket, new_bucket; + u_int32_t old_bucket, new_bucket; int dirsize, new_segnum, spare_ndx; #ifdef HASH_STATISTICS @@ -877,7 +877,7 @@ hash_realloc(p_ptr, oldsize, newsize) return (p); } -extern u_int +extern u_int32_t __call_hash(hashp, k, len) HTAB *hashp; char *k; diff --git a/lib/libc/db/hash/hash.h b/lib/libc/db/hash/hash.h index cfbedaa..913e82b 100644 --- a/lib/libc/db/hash/hash.h +++ b/lib/libc/db/hash/hash.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)hash.h 8.2 (Berkeley) 2/21/94 + * @(#)hash.h 8.3 (Berkeley) 5/31/94 */ /* Operations */ @@ -45,12 +45,12 @@ typedef enum { typedef struct _bufhead BUFHEAD; struct _bufhead { - BUFHEAD *prev; /* LRU links */ - BUFHEAD *next; /* LRU links */ - BUFHEAD *ovfl; /* Overflow page buffer header */ - u_int addr; /* Address of this page */ - char *page; /* Actual page data */ - char flags; + BUFHEAD *prev; /* LRU links */ + BUFHEAD *next; /* LRU links */ + BUFHEAD *ovfl; /* Overflow page buffer header */ + u_int32_t addr; /* Address of this page */ + char *page; /* Actual page data */ + char flags; #define BUF_MOD 0x0001 #define BUF_DISK 0x0002 #define BUF_BUCKET 0x0004 @@ -62,51 +62,60 @@ struct _bufhead { typedef BUFHEAD **SEGMENT; /* Hash Table Information */ -typedef struct hashhdr { /* Disk resident portion */ - int magic; /* Magic NO for hash tables */ - int version; /* Version ID */ - long lorder; /* Byte Order */ - int bsize; /* Bucket/Page Size */ - int bshift; /* Bucket shift */ - int dsize; /* Directory Size */ - int ssize; /* Segment Size */ - int sshift; /* Segment shift */ - int ovfl_point; /* Where overflow pages are being allocated */ - int last_freed; /* Last overflow page freed */ - int max_bucket; /* ID of Maximum bucket in use */ - int high_mask; /* Mask to modulo into entire table */ - int low_mask; /* Mask to modulo into lower half of table */ - int ffactor; /* Fill factor */ - int nkeys; /* Number of keys in hash table */ - int hdrpages; /* Size of table header */ - int h_charkey; /* value of hash(CHARKEY) */ -#define NCACHED 32 /* number of bit maps and spare points */ - int spares[NCACHED];/* spare pages for overflow */ - u_short bitmaps[NCACHED]; /* address of overflow page bitmaps */ +typedef struct hashhdr { /* Disk resident portion */ + int magic; /* Magic NO for hash tables */ + int version; /* Version ID */ + u_int32_t lorder; /* Byte Order */ + int bsize; /* Bucket/Page Size */ + int bshift; /* Bucket shift */ + int dsize; /* Directory Size */ + int ssize; /* Segment Size */ + int sshift; /* Segment shift */ + int ovfl_point; /* Where overflow pages are being + * allocated */ + int last_freed; /* Last overflow page freed */ + int max_bucket; /* ID of Maximum bucket in use */ + int high_mask; /* Mask to modulo into entire table */ + int low_mask; /* Mask to modulo into lower half of + * table */ + int ffactor; /* Fill factor */ + int nkeys; /* Number of keys in hash table */ + int hdrpages; /* Size of table header */ + int h_charkey; /* value of hash(CHARKEY) */ +#define NCACHED 32 /* number of bit maps and spare + * points */ + int spares[NCACHED];/* spare pages for overflow */ + u_int16_t bitmaps[NCACHED]; /* address of overflow page + * bitmaps */ } HASHHDR; -typedef struct htab { /* Memory resident data structure */ - HASHHDR hdr; /* Header */ - int nsegs; /* Number of allocated segments */ - int exsegs; /* Number of extra allocated segments */ - u_int32_t /* Hash function */ +typedef struct htab { /* Memory resident data structure */ + HASHHDR hdr; /* Header */ + int nsegs; /* Number of allocated segments */ + int exsegs; /* Number of extra allocated + * segments */ + u_int32_t /* Hash function */ (*hash)__P((const void *, size_t)); - int flags; /* Flag values */ - int fp; /* File pointer */ - char *tmp_buf; /* Temporary Buffer for BIG data */ - char *tmp_key; /* Temporary Buffer for BIG keys */ - BUFHEAD *cpage; /* Current page */ - int cbucket; /* Current bucket */ - int cndx; /* Index of next item on cpage */ - int errno; /* Error Number -- for DBM compatability */ - int new_file; /* Indicates if fd is backing store or no */ - int save_file; /* Indicates whether we need to flush file at - * exit */ - u_long *mapp[NCACHED]; /* Pointers to page maps */ - int nmaps; /* Initial number of bitmaps */ - int nbufs; /* Number of buffers left to allocate */ - BUFHEAD bufhead; /* Header of buffer lru list */ - SEGMENT *dir; /* Hash Bucket directory */ + int flags; /* Flag values */ + int fp; /* File pointer */ + char *tmp_buf; /* Temporary Buffer for BIG data */ + char *tmp_key; /* Temporary Buffer for BIG keys */ + BUFHEAD *cpage; /* Current page */ + int cbucket; /* Current bucket */ + int cndx; /* Index of next item on cpage */ + int errno; /* Error Number -- for DBM + * compatability */ + int new_file; /* Indicates if fd is backing store + * or no */ + int save_file; /* Indicates whether we need to flush + * file at + * exit */ + u_int32_t *mapp[NCACHED]; /* Pointers to page maps */ + int nmaps; /* Initial number of bitmaps */ + int nbufs; /* Number of buffers left to + * allocate */ + BUFHEAD bufhead; /* Header of buffer lru list */ + SEGMENT *dir; /* Hash Bucket directory */ } HTAB; /* @@ -129,14 +138,14 @@ typedef struct htab { /* Memory resident data structure */ #define BYTE_SHIFT 3 #define INT_TO_BYTE 2 #define INT_BYTE_SHIFT 5 -#define ALL_SET ((u_int)0xFFFFFFFF) +#define ALL_SET ((u_int32_t)0xFFFFFFFF) #define ALL_CLEAR 0 -#define PTROF(X) ((BUFHEAD *)((u_int)(X)&~0x3)) -#define ISMOD(X) ((u_int)(X)&0x1) -#define DOMOD(X) ((X) = (char *)((u_int)(X)|0x1)) -#define ISDISK(X) ((u_int)(X)&0x2) -#define DODISK(X) ((X) = (char *)((u_int)(X)|0x2)) +#define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3)) +#define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1) +#define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x1)) +#define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2) +#define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0x2)) #define BITS_PER_MAP 32 @@ -156,9 +165,9 @@ typedef struct htab { /* Memory resident data structure */ #define SPLITSHIFT 11 #define SPLITMASK 0x7FF -#define SPLITNUM(N) (((u_int)(N)) >> SPLITSHIFT) +#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIFT) #define OPAGENUM(N) ((N) & SPLITMASK) -#define OADDR_OF(S,O) ((u_int)((u_int)(S) << SPLITSHIFT) + (O)) +#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) << SPLITSHIFT) + (O)) #define BUCKET_TO_PAGE(B) \ (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[__log2((B)+1)-1] : 0) diff --git a/lib/libc/db/hash/hash_bigkey.c b/lib/libc/db/hash/hash_bigkey.c index b747fbc..578314a 100644 --- a/lib/libc/db/hash/hash_bigkey.c +++ b/lib/libc/db/hash/hash_bigkey.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hash_bigkey.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; #endif /* LIBC_SCCS and not lint */ /* @@ -90,13 +90,13 @@ __big_insert(hashp, bufp, key, val) BUFHEAD *bufp; const DBT *key, *val; { - register u_short *p; + register u_int16_t *p; int key_size, n, val_size; - u_short space, move_bytes, off; + u_int16_t space, move_bytes, off; char *cp, *key_data, *val_data; cp = bufp->page; /* Character pointer of p. */ - p = (u_short *)cp; + p = (u_int16_t *)cp; key_data = (char *)key->data; key_size = key->size; @@ -134,7 +134,7 @@ __big_insert(hashp, bufp, key, val) OFFSET(p) = off; } else p[n - 2] = FULL_KEY; - p = (u_short *)bufp->page; + p = (u_int16_t *)bufp->page; cp = bufp->page; bufp->flags |= BUF_MOD; } @@ -164,7 +164,7 @@ __big_insert(hashp, bufp, key, val) if (!bufp) return (-1); cp = bufp->page; - p = (u_short *)cp; + p = (u_int16_t *)cp; } else p[n] = FULL_KEY_DATA; bufp->flags |= BUF_MOD; @@ -189,12 +189,12 @@ __big_delete(hashp, bufp) BUFHEAD *bufp; { register BUFHEAD *last_bfp, *rbufp; - u_short *bp, pageno; + u_int16_t *bp, pageno; int key_done, n; rbufp = bufp; last_bfp = NULL; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; pageno = 0; key_done = 0; @@ -217,7 +217,7 @@ __big_delete(hashp, bufp) last_bfp = rbufp; if (!rbufp) return (-1); /* Error. */ - bp = (u_short *)rbufp->page; + bp = (u_int16_t *)rbufp->page; } /* @@ -232,7 +232,7 @@ __big_delete(hashp, bufp) pageno = bp[n - 1]; /* Now, bp is the first page of the pair. */ - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; if (n > 2) { /* There is an overflow page. */ bp[1] = pageno; @@ -270,13 +270,13 @@ __find_bigpair(hashp, bufp, ndx, key, size) char *key; int size; { - register u_short *bp; + register u_int16_t *bp; register char *p; int ksize; - u_short bytes; + u_int16_t bytes; char *kkey; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; p = bufp->page; ksize = size; kkey = key; @@ -292,7 +292,7 @@ __find_bigpair(hashp, bufp, ndx, key, size) if (!bufp) return (-3); p = bufp->page; - bp = (u_short *)p; + bp = (u_int16_t *)p; ndx = 1; } @@ -314,17 +314,17 @@ __find_bigpair(hashp, bufp, ndx, key, size) * of the pair; 0 if there isn't any (i.e. big pair is the last key in the * bucket) */ -extern u_short +extern u_int16_t __find_last_page(hashp, bpp) HTAB *hashp; BUFHEAD **bpp; { BUFHEAD *bufp; - u_short *bp, pageno; + u_int16_t *bp, pageno; int n; bufp = *bpp; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; for (;;) { n = bp[0]; @@ -341,7 +341,7 @@ __find_last_page(hashp, bpp) bufp = __get_buf(hashp, pageno, bufp, 0); if (!bufp) return (0); /* Need to indicate an error! */ - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; } *bpp = bufp; @@ -364,15 +364,15 @@ __big_return(hashp, bufp, ndx, val, set_current) int set_current; { BUFHEAD *save_p; - u_short *bp, len, off, save_addr; + u_int16_t *bp, len, off, save_addr; char *tp; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; while (bp[ndx + 1] == PARTIAL_KEY) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; ndx = 1; } @@ -380,7 +380,7 @@ __big_return(hashp, bufp, ndx, val, set_current) bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; save_p = bufp; save_addr = save_p->addr; off = bp[1]; @@ -401,7 +401,7 @@ __big_return(hashp, bufp, ndx, val, set_current) bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; } else { /* The data is all on one page. */ tp = (char *)bp; @@ -420,7 +420,7 @@ __big_return(hashp, bufp, ndx, val, set_current) if (!hashp->cpage) return (-1); hashp->cndx = 1; - if (!((u_short *) + if (!((u_int16_t *) hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; @@ -452,14 +452,14 @@ collect_data(hashp, bufp, len, set) BUFHEAD *bufp; int len, set; { - register u_short *bp; + register u_int16_t *bp; register char *p; BUFHEAD *xbp; - u_short save_addr; + u_int16_t save_addr; int mylen, totlen; p = bufp->page; - bp = (u_short *)p; + bp = (u_int16_t *)p; mylen = hashp->BSIZE - bp[1]; save_addr = bufp->addr; @@ -479,7 +479,7 @@ collect_data(hashp, bufp, len, set) __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!hashp->cpage) return (-1); - else if (!((u_short *)hashp->cpage->page)[0]) { + else if (!((u_int16_t *)hashp->cpage->page)[0]) { hashp->cbucket++; hashp->cpage = NULL; } @@ -531,10 +531,10 @@ collect_key(hashp, bufp, len, val, set) BUFHEAD *xbp; char *p; int mylen, totlen; - u_short *bp, save_addr; + u_int16_t *bp, save_addr; p = bufp->page; - bp = (u_short *)p; + bp = (u_int16_t *)p; mylen = hashp->BSIZE - bp[1]; save_addr = bufp->addr; @@ -573,15 +573,15 @@ __big_split(hashp, op, np, big_keyp, addr, obucket, ret) /* Pointer to first page containing the big key/data */ BUFHEAD *big_keyp; int addr; /* Address of big_keyp */ - u_int obucket;/* Old Bucket */ + u_int32_t obucket;/* Old Bucket */ SPLIT_RETURN *ret; { register BUFHEAD *tmpp; - register u_short *tp; + register u_int16_t *tp; BUFHEAD *bp; DBT key, val; - u_int change; - u_short free_space, n, off; + u_int32_t change; + u_int16_t free_space, n, off; bp = big_keyp; @@ -613,14 +613,14 @@ __big_split(hashp, op, np, big_keyp, addr, obucket, ret) (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); #endif tmpp->ovfl = bp; /* one of op/np point to big_keyp */ - tp = (u_short *)tmpp->page; + tp = (u_int16_t *)tmpp->page; #ifdef DEBUG assert(FREESPACE(tp) >= OVFLSIZE); #endif n = tp[0]; off = OFFSET(tp); free_space = FREESPACE(tp); - tp[++n] = (u_short)addr; + tp[++n] = (u_int16_t)addr; tp[++n] = OVFLPAGE; tp[0] = n; OFFSET(tp) = off; @@ -636,7 +636,7 @@ __big_split(hashp, op, np, big_keyp, addr, obucket, ret) ret->newp = np; ret->oldp = op; - tp = (u_short *)big_keyp->page; + tp = (u_int16_t *)big_keyp->page; big_keyp->flags |= BUF_MOD; if (tp[0] > 2) { /* diff --git a/lib/libc/db/hash/hash_buf.c b/lib/libc/db/hash/hash_buf.c index 1362c83..92e1f93 100644 --- a/lib/libc/db/hash/hash_buf.c +++ b/lib/libc/db/hash/hash_buf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hash_buf.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)hash_buf.c 8.5 (Berkeley) 7/15/94"; #endif /* LIBC_SCCS and not lint */ /* @@ -57,8 +57,10 @@ static char sccsid[] = "@(#)hash_buf.c 8.2 (Berkeley) 2/21/94"; #include #include +#include #include #include + #ifdef DEBUG #include #endif @@ -68,7 +70,7 @@ static char sccsid[] = "@(#)hash_buf.c 8.2 (Berkeley) 2/21/94"; #include "page.h" #include "extern.h" -static BUFHEAD *newbuf __P((HTAB *, u_int, BUFHEAD *)); +static BUFHEAD *newbuf __P((HTAB *, u_int32_t, BUFHEAD *)); /* Unlink B from its place in the lru */ #define BUF_REMOVE(B) { \ @@ -102,12 +104,12 @@ static BUFHEAD *newbuf __P((HTAB *, u_int, BUFHEAD *)); extern BUFHEAD * __get_buf(hashp, addr, prev_bp, newpage) HTAB *hashp; - u_int addr; + u_int32_t addr; BUFHEAD *prev_bp; int newpage; /* If prev_bp set, indicates a new overflow page. */ { register BUFHEAD *bp; - register u_int is_disk_mask; + register u_int32_t is_disk_mask; register int is_disk, segment_ndx; SEGMENT segp; @@ -140,7 +142,7 @@ __get_buf(hashp, addr, prev_bp, newpage) return (NULL); if (!prev_bp) segp[segment_ndx] = - (BUFHEAD *)((u_int)bp | is_disk_mask); + (BUFHEAD *)((ptrdiff_t)bp | is_disk_mask); } else { BUF_REMOVE(bp); MRU_INSERT(bp); @@ -157,7 +159,7 @@ __get_buf(hashp, addr, prev_bp, newpage) static BUFHEAD * newbuf(hashp, addr, prev_bp) HTAB *hashp; - u_int addr; + u_int32_t addr; BUFHEAD *prev_bp; { register BUFHEAD *bp; /* The buffer we're going to use */ @@ -165,7 +167,7 @@ newbuf(hashp, addr, prev_bp) register BUFHEAD *next_xbp; SEGMENT segp; int segment_ndx; - u_short oaddr, *shortp; + u_int16_t oaddr, *shortp; oaddr = 0; bp = LRU; @@ -177,10 +179,16 @@ newbuf(hashp, addr, prev_bp) /* Allocate a new one */ if ((bp = (BUFHEAD *)malloc(sizeof(BUFHEAD))) == NULL) return (NULL); +#ifdef PURIFY + memset(bp, 0xff, sizeof(BUFHEAD)); +#endif if ((bp->page = (char *)malloc(hashp->BSIZE)) == NULL) { free(bp); return (NULL); } +#ifdef PURIFY + memset(bp->page, 0xff, hashp->BSIZE); +#endif if (hashp->nbufs) hashp->nbufs--; } else { @@ -195,7 +203,7 @@ newbuf(hashp, addr, prev_bp) * Set oaddr before __put_page so that you get it * before bytes are swapped. */ - shortp = (u_short *)bp->page; + shortp = (u_int16_t *)bp->page; if (shortp[0]) oaddr = shortp[shortp[0] - 1]; if ((bp->flags & BUF_MOD) && __put_page(hashp, bp->page, @@ -238,7 +246,7 @@ newbuf(hashp, addr, prev_bp) (oaddr != xbp->addr)) break; - shortp = (u_short *)xbp->page; + shortp = (u_int16_t *)xbp->page; if (shortp[0]) /* set before __put_page */ oaddr = shortp[shortp[0] - 1]; diff --git a/lib/libc/db/hash/hash_log2.c b/lib/libc/db/hash/hash_log2.c index b773707..c8c56bf 100644 --- a/lib/libc/db/hash/hash_log2.c +++ b/lib/libc/db/hash/hash_log2.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,16 +35,18 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hash_log2.c 8.1 (Berkeley) 6/4/93"; +static char sccsid[] = "@(#)hash_log2.c 8.2 (Berkeley) 5/31/94"; #endif /* LIBC_SCCS and not lint */ #include -u_int +#include + +u_int32_t __log2(num) - u_int num; + u_int32_t num; { - register u_int i, limit; + register u_int32_t i, limit; limit = 1; for (i = 0; limit < num; limit = limit << 1, i++); diff --git a/lib/libc/db/hash/hash_page.c b/lib/libc/db/hash/hash_page.c index 2b2f572..e1dfe6b 100644 --- a/lib/libc/db/hash/hash_page.c +++ b/lib/libc/db/hash/hash_page.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hash_page.c 8.4 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; #endif /* LIBC_SCCS and not lint */ /* @@ -72,19 +72,19 @@ static char sccsid[] = "@(#)hash_page.c 8.4 (Berkeley) 2/21/94"; #include "page.h" #include "extern.h" -static u_long *fetch_bitmap __P((HTAB *, int)); -static u_long first_free __P((u_long)); +static u_int32_t *fetch_bitmap __P((HTAB *, int)); +static u_int32_t first_free __P((u_int32_t)); static int open_temp __P((HTAB *)); -static u_short overflow_page __P((HTAB *)); +static u_int16_t overflow_page __P((HTAB *)); static void putpair __P((char *, const DBT *, const DBT *)); -static void squeeze_key __P((u_short *, const DBT *, const DBT *)); +static void squeeze_key __P((u_int16_t *, const DBT *, const DBT *)); static int ugly_split - __P((HTAB *, u_int, BUFHEAD *, BUFHEAD *, int, int)); + __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int)); #define PAGE_INIT(P) { \ - ((u_short *)(P))[0] = 0; \ - ((u_short *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_short); \ - ((u_short *)(P))[2] = hashp->BSIZE; \ + ((u_int16_t *)(P))[0] = 0; \ + ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \ + ((u_int16_t *)(P))[2] = hashp->BSIZE; \ } /* @@ -97,9 +97,9 @@ putpair(p, key, val) char *p; const DBT *key, *val; { - register u_short *bp, n, off; + register u_int16_t *bp, n, off; - bp = (u_short *)p; + bp = (u_int16_t *)p; /* Enter the key first. */ n = bp[0]; @@ -115,7 +115,7 @@ putpair(p, key, val) /* Adjust page info. */ bp[0] = n; - bp[n + 1] = off - ((n + 3) * sizeof(u_short)); + bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t)); bp[n + 2] = off; } @@ -130,11 +130,11 @@ __delpair(hashp, bufp, ndx) BUFHEAD *bufp; register int ndx; { - register u_short *bp, newoff; + register u_int16_t *bp, newoff; register int n; - u_short pairlen; + u_int16_t pairlen; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; n = bp[0]; if (bp[ndx + 1] < REAL_KEY) @@ -165,7 +165,7 @@ __delpair(hashp, bufp, ndx) } /* Finally adjust the page data */ bp[n] = OFFSET(bp) + pairlen; - bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_short); + bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); bp[0] = n - 2; hashp->NKEYS--; @@ -180,18 +180,18 @@ __delpair(hashp, bufp, ndx) extern int __split_page(hashp, obucket, nbucket) HTAB *hashp; - u_int obucket, nbucket; + u_int32_t obucket, nbucket; { register BUFHEAD *new_bufp, *old_bufp; - register u_short *ino; + register u_int16_t *ino; register char *np; DBT key, val; int n, ndx, retval; - u_short copyto, diff, off, moved; + u_int16_t copyto, diff, off, moved; char *op; - copyto = (u_short)hashp->BSIZE; - off = (u_short)hashp->BSIZE; + copyto = (u_int16_t)hashp->BSIZE; + off = (u_int16_t)hashp->BSIZE; old_bufp = __get_buf(hashp, obucket, NULL, 0); if (old_bufp == NULL) return (-1); @@ -202,7 +202,7 @@ __split_page(hashp, obucket, nbucket) old_bufp->flags |= (BUF_MOD | BUF_PIN); new_bufp->flags |= (BUF_MOD | BUF_PIN); - ino = (u_short *)(op = old_bufp->page); + ino = (u_int16_t *)(op = old_bufp->page); np = new_bufp->page; moved = 0; @@ -244,13 +244,13 @@ __split_page(hashp, obucket, nbucket) /* Now clean up the page */ ino[0] -= moved; - FREESPACE(ino) = copyto - sizeof(u_short) * (ino[0] + 3); + FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = copyto; #ifdef DEBUG3 (void)fprintf(stderr, "split %d/%d\n", - ((u_short *)np)[0] / 2, - ((u_short *)op)[0] / 2); + ((u_int16_t *)np)[0] / 2, + ((u_int16_t *)op)[0] / 2); #endif /* unpin both pages */ old_bufp->flags &= ~BUF_PIN; @@ -276,28 +276,28 @@ __split_page(hashp, obucket, nbucket) static int ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) HTAB *hashp; - u_int obucket; /* Same as __split_page. */ + u_int32_t obucket; /* Same as __split_page. */ BUFHEAD *old_bufp, *new_bufp; int copyto; /* First byte on page which contains key/data values. */ int moved; /* Number of pairs moved to new page. */ { register BUFHEAD *bufp; /* Buffer header for ino */ - register u_short *ino; /* Page keys come off of */ - register u_short *np; /* New page */ - register u_short *op; /* Page keys go on to if they aren't moving */ + register u_int16_t *ino; /* Page keys come off of */ + register u_int16_t *np; /* New page */ + register u_int16_t *op; /* Page keys go on to if they aren't moving */ BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ DBT key, val; SPLIT_RETURN ret; - u_short n, off, ov_addr, scopyto; + u_int16_t n, off, ov_addr, scopyto; char *cino; /* Character value of ino */ bufp = old_bufp; - ino = (u_short *)old_bufp->page; - np = (u_short *)new_bufp->page; - op = (u_short *)old_bufp->page; + ino = (u_int16_t *)old_bufp->page; + np = (u_int16_t *)new_bufp->page; + op = (u_int16_t *)old_bufp->page; last_bfp = NULL; - scopyto = (u_short)copyto; /* ANSI */ + scopyto = (u_int16_t)copyto; /* ANSI */ n = ino[0] - 1; while (n < ino[0]) { @@ -308,16 +308,16 @@ ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) old_bufp = ret.oldp; if (!old_bufp) return (-1); - op = (u_short *)old_bufp->page; + op = (u_int16_t *)old_bufp->page; new_bufp = ret.newp; if (!new_bufp) return (-1); - np = (u_short *)new_bufp->page; + np = (u_int16_t *)new_bufp->page; bufp = ret.nextp; if (!bufp) return (0); cino = (char *)bufp->page; - ino = (u_short *)cino; + ino = (u_int16_t *)cino; last_bfp = ret.nextp; } else if (ino[n + 1] == OVFLPAGE) { ov_addr = ino[n]; @@ -327,14 +327,14 @@ ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) */ ino[0] -= (moved + 2); FREESPACE(ino) = - scopyto - sizeof(u_short) * (ino[0] + 3); + scopyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = scopyto; bufp = __get_buf(hashp, ov_addr, bufp, 0); if (!bufp) return (-1); - ino = (u_short *)bufp->page; + ino = (u_int16_t *)bufp->page; n = 1; scopyto = hashp->BSIZE; moved = 0; @@ -362,7 +362,7 @@ ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) __add_ovflpage(hashp, old_bufp); if (!old_bufp) return (-1); - op = (u_short *)old_bufp->page; + op = (u_int16_t *)old_bufp->page; putpair((char *)op, &key, &val); } old_bufp->flags |= BUF_MOD; @@ -375,7 +375,7 @@ ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) __add_ovflpage(hashp, new_bufp); if (!new_bufp) return (-1); - np = (u_short *)new_bufp->page; + np = (u_int16_t *)new_bufp->page; putpair((char *)np, &key, &val); } new_bufp->flags |= BUF_MOD; @@ -400,10 +400,10 @@ __addel(hashp, bufp, key, val) BUFHEAD *bufp; const DBT *key, *val; { - register u_short *bp, *sop; + register u_int16_t *bp, *sop; int do_expand; - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; do_expand = 0; while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) /* Exception case */ @@ -415,7 +415,7 @@ __addel(hashp, bufp, key, val) bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; } else /* Try to squeeze key on this page */ if (FREESPACE(bp) > PAIRSIZE(key, val)) { @@ -425,7 +425,7 @@ __addel(hashp, bufp, key, val) bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); - bp = (u_short *)bufp->page; + bp = (u_int16_t *)bufp->page; } if (PAIRFITS(bp, key, val)) @@ -435,7 +435,7 @@ __addel(hashp, bufp, key, val) bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); - sop = (u_short *)bufp->page; + sop = (u_int16_t *)bufp->page; if (PAIRFITS(sop, key, val)) putpair((char *)sop, key, val); @@ -466,12 +466,12 @@ __add_ovflpage(hashp, bufp) HTAB *hashp; BUFHEAD *bufp; { - register u_short *sp; - u_short ndx, ovfl_num; + register u_int16_t *sp; + u_int16_t ndx, ovfl_num; #ifdef DEBUG1 int tmp1, tmp2; #endif - sp = (u_short *)bufp->page; + sp = (u_int16_t *)bufp->page; /* Check if we are dynamically determining the fill factor */ if (hashp->FFACTOR == DEF_FFACTOR) { @@ -518,12 +518,12 @@ extern int __get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap) HTAB *hashp; char *p; - u_int bucket; + u_int32_t bucket; int is_bucket, is_disk, is_bitmap; { register int fd, page, size; int rsize; - u_short *bp; + u_int16_t *bp; fd = hashp->fp; size = hashp->BSIZE; @@ -539,7 +539,7 @@ __get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap) if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) || ((rsize = read(fd, p, size)) == -1)) return (-1); - bp = (u_short *)p; + bp = (u_int16_t *)p; if (!rsize) bp[0] = 0; /* We hit the EOF, so initialize a new page */ else @@ -556,7 +556,7 @@ __get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap) if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) - M_32_SWAP(((long *)p)[i]); + M_32_SWAP(((int *)p)[i]); } else { M_16_SWAP(bp[0]); max = bp[0] + 2; @@ -578,7 +578,7 @@ extern int __put_page(hashp, p, bucket, is_bucket, is_bitmap) HTAB *hashp; char *p; - u_int bucket; + u_int32_t bucket; int is_bucket, is_bitmap; { register int fd, page, size; @@ -596,11 +596,11 @@ __put_page(hashp, p, bucket, is_bucket, is_bitmap) if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) - M_32_SWAP(((long *)p)[i]); + M_32_SWAP(((int *)p)[i]); } else { - max = ((u_short *)p)[0] + 2; + max = ((u_int16_t *)p)[0] + 2; for (i = 0; i <= max; i++) - M_16_SWAP(((u_short *)p)[i]); + M_16_SWAP(((u_int16_t *)p)[i]); } } if (is_bucket) @@ -624,14 +624,14 @@ __put_page(hashp, p, bucket, is_bucket, is_bitmap) * once they are read in. */ extern int -__init_bitmap(hashp, pnum, nbits, ndx) +__ibitmap(hashp, pnum, nbits, ndx) HTAB *hashp; int pnum, nbits, ndx; { - u_long *ip; + u_int32_t *ip; int clearbytes, clearints; - if ((ip = (u_long *)malloc(hashp->BSIZE)) == NULL) + if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (1); hashp->nmaps++; clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; @@ -641,16 +641,16 @@ __init_bitmap(hashp, pnum, nbits, ndx) hashp->BSIZE - clearbytes); ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); SETBIT(ip, 0); - hashp->BITMAPS[ndx] = (u_short)pnum; + hashp->BITMAPS[ndx] = (u_int16_t)pnum; hashp->mapp[ndx] = ip; return (0); } -static u_long +static u_int32_t first_free(map) - u_long map; + u_int32_t map; { - register u_long i, mask; + register u_int32_t i, mask; mask = 0x1; for (i = 0; i < BITS_PER_MAP; i++) { @@ -661,13 +661,13 @@ first_free(map) return (i); } -static u_short +static u_int16_t overflow_page(hashp) HTAB *hashp; { - register u_long *freep; + register u_int32_t *freep; register int max_free, offset, splitnum; - u_short addr; + u_int16_t addr; int bit, first_page, free_bit, free_page, i, in_use_bits, j; #ifdef DEBUG2 int tmp1, tmp2; @@ -681,9 +681,9 @@ overflow_page(hashp) /* Look through all the free maps to find the first free block */ first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); for ( i = first_page; i <= free_page; i++ ) { - if (!(freep = (u_long *)hashp->mapp[i]) && + if (!(freep = (u_int32_t *)hashp->mapp[i]) && !(freep = fetch_bitmap(hashp, i))) - return (NULL); + return (0); if (i == free_page) in_use_bits = free_bit; else @@ -713,7 +713,7 @@ overflow_page(hashp) if (offset > SPLITMASK) { if (++splitnum >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); - return (NULL); + return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; @@ -726,7 +726,7 @@ overflow_page(hashp) free_page++; if (free_page >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); - return (NULL); + return (0); } /* * This is tricky. The 1 indicates that you want the new page @@ -739,9 +739,9 @@ overflow_page(hashp) * don't have to if we tell init_bitmap not to leave it clear * in the first place. */ - if (__init_bitmap(hashp, (int)OADDR_OF(splitnum, offset), - 1, free_page)) - return (NULL); + if (__ibitmap(hashp, + (int)OADDR_OF(splitnum, offset), 1, free_page)) + return (0); hashp->SPARES[splitnum]++; #ifdef DEBUG2 free_bit = 2; @@ -751,7 +751,7 @@ overflow_page(hashp) if (++splitnum >= NCACHED) { (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); - return (NULL); + return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; @@ -795,7 +795,7 @@ found: for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); offset = (i ? bit - hashp->SPARES[i - 1] : bit); if (offset >= SPLITMASK) - return (NULL); /* Out of overflow pages */ + return (0); /* Out of overflow pages */ addr = OADDR_OF(i, offset); #ifdef DEBUG2 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", @@ -814,16 +814,16 @@ __free_ovflpage(hashp, obufp) HTAB *hashp; BUFHEAD *obufp; { - register u_short addr; - u_long *freep; + register u_int16_t addr; + u_int32_t *freep; int bit_address, free_page, free_bit; - u_short ndx; + u_int16_t ndx; addr = obufp->addr; #ifdef DEBUG1 (void)fprintf(stderr, "Freeing %d\n", addr); #endif - ndx = (((u_short)addr) >> SPLITSHIFT); + ndx = (((u_int16_t)addr) >> SPLITSHIFT); bit_address = (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; if (bit_address < hashp->LAST_FREED) @@ -879,11 +879,11 @@ open_temp(hashp) */ static void squeeze_key(sp, key, val) - u_short *sp; + u_int16_t *sp; const DBT *key, *val; { register char *p; - u_short free_space, n, off, pageno; + u_int16_t free_space, n, off, pageno; p = (char *)sp; n = sp[0]; @@ -904,14 +904,14 @@ squeeze_key(sp, key, val) OFFSET(sp) = off; } -static u_long * +static u_int32_t * fetch_bitmap(hashp, ndx) HTAB *hashp; int ndx; { if (ndx >= hashp->nmaps) return (NULL); - if ((hashp->mapp[ndx] = (u_long *)malloc(hashp->BSIZE)) == NULL) + if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (NULL); if (__get_page(hashp, (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { diff --git a/lib/libc/db/hash/hsearch.c b/lib/libc/db/hash/hsearch.c index e4914b1..cc8f7a4 100644 --- a/lib/libc/db/hash/hsearch.c +++ b/lib/libc/db/hash/hsearch.c @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hsearch.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)hsearch.c 8.4 (Berkeley) 7/21/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -58,7 +58,7 @@ hcreate(nel) info.nelem = nel; info.bsize = 256; info.ffactor = 8; - info.cachesize = NULL; + info.cachesize = 0; info.hash = NULL; info.lorder = 0; dbp = (DB *)__hash_open(NULL, O_CREAT | O_RDWR, 0600, &info, 0); diff --git a/lib/libc/db/hash/ndbm.c b/lib/libc/db/hash/ndbm.c index 89b9916..2cbbe91 100644 --- a/lib/libc/db/hash/ndbm.c +++ b/lib/libc/db/hash/ndbm.c @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)ndbm.c 8.2 (Berkeley) 9/11/93"; +static char sccsid[] = "@(#)ndbm.c 8.4 (Berkeley) 7/21/94"; #endif /* LIBC_SCCS and not lint */ /* @@ -45,10 +45,10 @@ static char sccsid[] = "@(#)ndbm.c 8.2 (Berkeley) 9/11/93"; #include -#include #include #include +#include #include "hash.h" /* @@ -67,7 +67,7 @@ dbm_open(file, flags, mode) info.bsize = 4096; info.ffactor = 40; info.nelem = 1; - info.cachesize = NULL; + info.cachesize = 0; info.hash = NULL; info.lorder = 0; (void)strcpy(path, file); diff --git a/lib/libc/db/hash/page.h b/lib/libc/db/hash/page.h index dae82ae..0fc0d5a 100644 --- a/lib/libc/db/hash/page.h +++ b/lib/libc/db/hash/page.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)page.h 8.1 (Berkeley) 6/6/93 + * @(#)page.h 8.2 (Berkeley) 5/31/94 */ /* @@ -73,20 +73,20 @@ * You might as well do this up front. */ -#define PAIRSIZE(K,D) (2*sizeof(u_short) + (K)->size + (D)->size) -#define BIGOVERHEAD (4*sizeof(u_short)) -#define KEYSIZE(K) (4*sizeof(u_short) + (K)->size); -#define OVFLSIZE (2*sizeof(u_short)) +#define PAIRSIZE(K,D) (2*sizeof(u_int16_t) + (K)->size + (D)->size) +#define BIGOVERHEAD (4*sizeof(u_int16_t)) +#define KEYSIZE(K) (4*sizeof(u_int16_t) + (K)->size); +#define OVFLSIZE (2*sizeof(u_int16_t)) #define FREESPACE(P) ((P)[(P)[0]+1]) #define OFFSET(P) ((P)[(P)[0]+2]) #define PAIRFITS(P,K,D) \ (((P)[2] >= REAL_KEY) && \ (PAIRSIZE((K),(D)) + OVFLSIZE) <= FREESPACE((P))) -#define PAGE_META(N) (((N)+3) * sizeof(u_short)) +#define PAGE_META(N) (((N)+3) * sizeof(u_int16_t)) typedef struct { BUFHEAD *newp; BUFHEAD *oldp; BUFHEAD *nextp; - u_short next_addr; + u_int16_t next_addr; } SPLIT_RETURN; diff --git a/lib/libc/db/man/btree.3 b/lib/libc/db/man/btree.3 index a51e1e5..8284b21 100644 --- a/lib/libc/db/man/btree.3 +++ b/lib/libc/db/man/btree.3 @@ -29,9 +29,9 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" @(#)btree.3 8.3 (Berkeley) 2/21/94 +.\" @(#)btree.3 8.4 (Berkeley) 8/18/94 .\" -.TH BTREE 3 "February 21, 1994" +.TH BTREE 3 "August 18, 1994" .\".UC 7 .SH NAME btree \- btree database access method @@ -207,6 +207,13 @@ O lg base N where base is the average fill factor. Often, inserting ordered data into btrees results in a low fill factor. This implementation has been modified to make ordered insertion the best case, resulting in a much better than normal page fill factor. +.SH ERRORS +The +.I btree +access method routines may fail and set +.I errno +for any of the errors specified for the library routine +.IR dbopen (3). .SH "SEE ALSO" .IR dbopen (3), .IR hash (3), diff --git a/lib/libc/db/man/hash.3 b/lib/libc/db/man/hash.3 index 3ae00a2..4367031 100644 --- a/lib/libc/db/man/hash.3 +++ b/lib/libc/db/man/hash.3 @@ -29,9 +29,9 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" @(#)hash.3 8.5 (Berkeley) 2/21/94 +.\" @(#)hash.3 8.6 (Berkeley) 8/18/94 .\" -.TH HASH 3 "February 21, 1994" +.TH HASH 3 "August 18, 1994" .UC 7 .SH NAME hash \- hash database access method @@ -137,6 +137,13 @@ and .IR ndbm (3) are provided, however these interfaces are not compatible with previous file formats. +.SH ERRORS +The +.I hash +access method routines may fail and set +.I errno +for any of the errors specified for the library routine +.IR dbopen (3). .SH "SEE ALSO" .IR btree (3), .IR dbopen (3), diff --git a/lib/libc/db/man/recno.3 b/lib/libc/db/man/recno.3 index a40e6c9..516bef0 100644 --- a/lib/libc/db/man/recno.3 +++ b/lib/libc/db/man/recno.3 @@ -29,9 +29,9 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" @(#)recno.3 8.3 (Berkeley) 2/21/94 +.\" @(#)recno.3 8.5 (Berkeley) 8/18/94 .\" -.TH RECNO 3 "February 21, 1994" +.TH RECNO 3 "August 18, 1994" .UC 7 .SH NAME recno \- record number database access method @@ -97,6 +97,9 @@ The structure element specifies the length of the record, and the structure element .I bval is used as the pad character. +Any records, inserted into the database, that are less than +.I reclen +bytes long are automatically padded. .TP R_NOKEY In the interface specified by @@ -176,6 +179,11 @@ The .I size field of the key should be the size of that type. .PP +Because there can be no meta-data associated with the underlying +recno access method files, any changes made to the default values +(e.g. fixed record length or byte separator value) must be explicitly +specified each time the file is opened. +.PP In the interface specified by .IR dbopen , using the @@ -183,11 +191,23 @@ using the interface to create a new record will cause the creation of multiple, empty records if the record number is more than one greater than the largest record currently in the database. +.SH ERRORS +The +.I recno +access method routines may fail and set +.I errno +for any of the errors specified for the library routine +.IR dbopen (3) +or the following: +.TP +[EINVAL] +An attempt was made to add a record to a fixed-length database that +was too large to fit. .SH "SEE ALSO" +.IR btree (3) .IR dbopen (3), .IR hash (3), .IR mpool (3), -.IR recno (3) .sp .IR "Document Processing in a Relational Database System" , Michael Stonebraker, Heidi Stettner, Joseph Kalash, Antonin Guttman, diff --git a/lib/libc/db/mpool/mpool.c b/lib/libc/db/mpool/mpool.c index 72627bd..a61041e 100644 --- a/lib/libc/db/mpool/mpool.c +++ b/lib/libc/db/mpool/mpool.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,10 +32,11 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include +#include #include #include @@ -45,31 +46,21 @@ static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; #include #include + #define __MPOOLINTERFACE_PRIVATE -#include "mpool.h" +#include static BKT *mpool_bkt __P((MPOOL *)); static BKT *mpool_look __P((MPOOL *, pgno_t)); static int mpool_write __P((MPOOL *, BKT *)); -#ifdef DEBUG -static void __mpoolerr __P((const char *fmt, ...)); -#endif /* - * MPOOL_OPEN -- initialize a memory pool. - * - * Parameters: - * key: Shared buffer key. - * fd: File descriptor. - * pagesize: File page size. - * maxcache: Max number of cached pages. - * - * Returns: - * MPOOL pointer, NULL on error. + * mpool_open -- + * Initialize a memory pool. */ MPOOL * mpool_open(key, fd, pagesize, maxcache) - DBT *key; + void *key; int fd; pgno_t pagesize, maxcache; { @@ -77,49 +68,35 @@ mpool_open(key, fd, pagesize, maxcache) MPOOL *mp; int entry; + /* + * Get information about the file. + * + * XXX + * We don't currently handle pipes, although we should. + */ if (fstat(fd, &sb)) return (NULL); - /* XXX - * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so - * that stat(2) returns true for ISSOCK on pipes. Until then, this is - * fairly close. - */ if (!S_ISREG(sb.st_mode)) { errno = ESPIPE; return (NULL); } - if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) + /* Allocate and initialize the MPOOL cookie. */ + if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) return (NULL); - mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; - mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; + CIRCLEQ_INIT(&mp->lqh); for (entry = 0; entry < HASHSIZE; ++entry) - mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = - mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = - (BKT *)&mp->hashtable[entry]; - mp->curcache = 0; + CIRCLEQ_INIT(&mp->hqh[entry]); mp->maxcache = maxcache; - mp->pagesize = pagesize; mp->npages = sb.st_size / pagesize; + mp->pagesize = pagesize; mp->fd = fd; - mp->pgcookie = NULL; - mp->pgin = mp->pgout = NULL; - -#ifdef STATISTICS - mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = - mp->pageget = mp->pagenew = mp->pageput = mp->pageread = - mp->pagewrite = 0; -#endif return (mp); } /* - * MPOOL_FILTER -- initialize input/output filters. - * - * Parameters: - * pgin: Page in conversion routine. - * pgout: Page out conversion routine. - * pgcookie: Cookie for page in/out routines. + * mpool_filter -- + * Initialize input/output filters. */ void mpool_filter(mp, pgin, pgout, pgcookie) @@ -134,124 +111,128 @@ mpool_filter(mp, pgin, pgout, pgcookie) } /* - * MPOOL_NEW -- get a new page - * - * Parameters: - * mp: mpool cookie - * pgnoadddr: place to store new page number - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_new -- + * Get a new page of memory. */ void * mpool_new(mp, pgnoaddr) MPOOL *mp; pgno_t *pgnoaddr; { - BKT *b; - BKTHDR *hp; + struct _hqh *head; + BKT *bp; + if (mp->npages == MAX_PAGE_NUMBER) { + (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); + abort(); + } #ifdef STATISTICS ++mp->pagenew; #endif /* - * Get a BKT from the cache. Assign a new page number, attach it to - * the hash and lru chains and return. + * Get a BKT from the cache. Assign a new page number, attach + * it to the head of the hash chain, the tail of the lru chain, + * and return. */ - if ((b = mpool_bkt(mp)) == NULL) + if ((bp = mpool_bkt(mp)) == NULL) return (NULL); - *pgnoaddr = b->pgno = mp->npages++; - b->flags = MPOOL_PINNED; - inshash(b, b->pgno); - inschain(b, &mp->lru); - return (b->page); + *pgnoaddr = bp->pgno = mp->npages++; + bp->flags = MPOOL_PINNED; + + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + return (bp->page); } /* - * MPOOL_GET -- get a page from the pool - * - * Parameters: - * mp: mpool cookie - * pgno: page number - * flags: not used - * - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_get + * Get a page. */ void * mpool_get(mp, pgno, flags) MPOOL *mp; pgno_t pgno; - u_int flags; /* XXX not used? */ + u_int flags; /* XXX not used? */ { - BKT *b; - BKTHDR *hp; + struct _hqh *head; + BKT *bp; off_t off; int nr; - /* - * If asking for a specific page that is already in the cache, find - * it and return it. - */ - if (b = mpool_look(mp, pgno)) { + /* Check for attempt to retrieve a non-existent page. */ + if (pgno >= mp->npages) { + errno = EINVAL; + return (NULL); + } + #ifdef STATISTICS - ++mp->pageget; + ++mp->pageget; #endif + + /* Check for a page that is cached. */ + if ((bp = mpool_look(mp, pgno)) != NULL) { #ifdef DEBUG - if (b->flags & MPOOL_PINNED) - __mpoolerr("mpool_get: page %d already pinned", - b->pgno); + if (bp->flags & MPOOL_PINNED) { + (void)fprintf(stderr, + "mpool_get: page %d already pinned\n", bp->pgno); + abort(); + } #endif - rmchain(b); - inschain(b, &mp->lru); - b->flags |= MPOOL_PINNED; - return (b->page); - } - - /* Not allowed to retrieve a non-existent page. */ - if (pgno >= mp->npages) { - errno = EINVAL; - return (NULL); + /* + * Move the page to the head of the hash chain and the tail + * of the lru chain. + */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_REMOVE(head, bp, hq); + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_REMOVE(&mp->lqh, bp, q); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + + /* Return a pinned page. */ + bp->flags |= MPOOL_PINNED; + return (bp->page); } /* Get a page from the cache. */ - if ((b = mpool_bkt(mp)) == NULL) + if ((bp = mpool_bkt(mp)) == NULL) return (NULL); - b->pgno = pgno; - b->flags = MPOOL_PINNED; + /* Read in the contents. */ #ifdef STATISTICS ++mp->pageread; #endif - /* Read in the contents. */ off = mp->pagesize * pgno; if (lseek(mp->fd, off, SEEK_SET) != off) return (NULL); - if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { + if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) { if (nr >= 0) errno = EFTYPE; return (NULL); } - if (mp->pgin) - (mp->pgin)(mp->pgcookie, b->pgno, b->page); - inshash(b, b->pgno); - inschain(b, &mp->lru); -#ifdef STATISTICS - ++mp->pageget; -#endif - return (b->page); + /* Set the page number, pin the page. */ + bp->pgno = pgno; + bp->flags = MPOOL_PINNED; + + /* + * Add the page to the head of the hash chain and the tail + * of the lru chain. + */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + + /* Run through the user's filter. */ + if (mp->pgin != NULL) + (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); + + return (bp->page); } /* - * MPOOL_PUT -- return a page to the pool - * - * Parameters: - * mp: mpool cookie - * page: page pointer - * pgno: page number - * - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_put + * Return a page. */ int mpool_put(mp, page, flags) @@ -259,193 +240,172 @@ mpool_put(mp, page, flags) void *page; u_int flags; { - BKT *baddr; -#ifdef DEBUG - BKT *b; -#endif + BKT *bp; #ifdef STATISTICS ++mp->pageput; #endif - baddr = (BKT *)((char *)page - sizeof(BKT)); + bp = (BKT *)((char *)page - sizeof(BKT)); #ifdef DEBUG - if (!(baddr->flags & MPOOL_PINNED)) - __mpoolerr("mpool_put: page %d not pinned", b->pgno); - for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { - if (b == (BKT *)&mp->lru) - __mpoolerr("mpool_put: %0x: bad address", baddr); - if (b == baddr) - break; + if (!(bp->flags & MPOOL_PINNED)) { + (void)fprintf(stderr, + "mpool_put: page %d not pinned\n", bp->pgno); + abort(); } #endif - baddr->flags &= ~MPOOL_PINNED; - baddr->flags |= flags & MPOOL_DIRTY; + bp->flags &= ~MPOOL_PINNED; + bp->flags |= flags & MPOOL_DIRTY; return (RET_SUCCESS); } /* - * MPOOL_CLOSE -- close the buffer pool - * - * Parameters: - * mp: mpool cookie - * - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_close + * Close the buffer pool. */ int mpool_close(mp) MPOOL *mp; { - BKT *b, *next; + BKT *bp; /* Free up any space allocated to the lru pages. */ - for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { - next = b->cprev; - free(b); + while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) { + CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q); + free(bp); } + + /* Free the MPOOL cookie. */ free(mp); return (RET_SUCCESS); } /* - * MPOOL_SYNC -- sync the file to disk. - * - * Parameters: - * mp: mpool cookie - * - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_sync + * Sync the pool to disk. */ int mpool_sync(mp) MPOOL *mp; { - BKT *b; + BKT *bp; - for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) - if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) + /* Walk the lru chain, flushing any dirty pages to disk. */ + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) + if (bp->flags & MPOOL_DIRTY && + mpool_write(mp, bp) == RET_ERROR) return (RET_ERROR); + + /* Sync the file descriptor. */ return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); } /* - * MPOOL_BKT -- get/create a BKT from the cache - * - * Parameters: - * mp: mpool cookie - * - * Returns: - * NULL on failure and a pointer to the BKT on success + * mpool_bkt + * Get a page from the cache (or create one). */ static BKT * mpool_bkt(mp) MPOOL *mp; { - BKT *b; + struct _hqh *head; + BKT *bp; + /* If under the max cached, always create a new page. */ if (mp->curcache < mp->maxcache) goto new; /* - * If the cache is maxxed out, search the lru list for a buffer we - * can flush. If we find one, write it if necessary and take it off - * any lists. If we don't find anything we grow the cache anyway. + * If the cache is max'd out, walk the lru list for a buffer we + * can flush. If we find one, write it (if necessary) and take it + * off any lists. If we don't find anything we grow the cache anyway. * The cache never shrinks. */ - for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) - if (!(b->flags & MPOOL_PINNED)) { - if (b->flags & MPOOL_DIRTY && - mpool_write(mp, b) == RET_ERROR) + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) + if (!(bp->flags & MPOOL_PINNED)) { + /* Flush if dirty. */ + if (bp->flags & MPOOL_DIRTY && + mpool_write(mp, bp) == RET_ERROR) return (NULL); - rmhash(b); - rmchain(b); #ifdef STATISTICS ++mp->pageflush; #endif + /* Remove from the hash and lru queues. */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_REMOVE(head, bp, hq); + CIRCLEQ_REMOVE(&mp->lqh, bp, q); #ifdef DEBUG - { - void *spage; - spage = b->page; - memset(b, 0xff, sizeof(BKT) + mp->pagesize); - b->page = spage; + { void *spage; + spage = bp->page; + memset(bp, 0xff, sizeof(BKT) + mp->pagesize); + bp->page = spage; } #endif - return (b); + return (bp); } -new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) +new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) return (NULL); #ifdef STATISTICS ++mp->pagealloc; #endif -#ifdef DEBUG - memset(b, 0xff, sizeof(BKT) + mp->pagesize); +#if defined(DEBUG) || defined(PURIFY) + memset(bp, 0xff, sizeof(BKT) + mp->pagesize); #endif - b->page = (char *)b + sizeof(BKT); + bp->page = (char *)bp + sizeof(BKT); ++mp->curcache; - return (b); + return (bp); } /* - * MPOOL_WRITE -- sync a page to disk - * - * Parameters: - * mp: mpool cookie - * - * Returns: - * RET_ERROR, RET_SUCCESS + * mpool_write + * Write a page to disk. */ static int -mpool_write(mp, b) +mpool_write(mp, bp) MPOOL *mp; - BKT *b; + BKT *bp; { off_t off; - if (mp->pgout) - (mp->pgout)(mp->pgcookie, b->pgno, b->page); - #ifdef STATISTICS ++mp->pagewrite; #endif - off = mp->pagesize * b->pgno; + + /* Run through the user's filter. */ + if (mp->pgout) + (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); + + off = mp->pagesize * bp->pgno; if (lseek(mp->fd, off, SEEK_SET) != off) return (RET_ERROR); - if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) + if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize) return (RET_ERROR); - b->flags &= ~MPOOL_DIRTY; + + bp->flags &= ~MPOOL_DIRTY; return (RET_SUCCESS); } /* - * MPOOL_LOOK -- lookup a page - * - * Parameters: - * mp: mpool cookie - * pgno: page number - * - * Returns: - * NULL on failure and a pointer to the BKT on success + * mpool_look + * Lookup a page in the cache. */ static BKT * mpool_look(mp, pgno) MPOOL *mp; pgno_t pgno; { - register BKT *b; - register BKTHDR *tb; + struct _hqh *head; + BKT *bp; - /* XXX - * If find the buffer, put it first on the hash chain so can - * find it again quickly. - */ - tb = &mp->hashtable[HASHKEY(pgno)]; - for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) - if (b->pgno == pgno) { + head = &mp->hqh[HASHKEY(pgno)]; + for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next) + if (bp->pgno == pgno) { #ifdef STATISTICS ++mp->cachehit; #endif - return (b); + return (bp); } #ifdef STATISTICS ++mp->cachemiss; @@ -455,16 +415,14 @@ mpool_look(mp, pgno) #ifdef STATISTICS /* - * MPOOL_STAT -- cache statistics - * - * Parameters: - * mp: mpool cookie + * mpool_stat + * Print out cache statistics. */ void mpool_stat(mp) MPOOL *mp; { - BKT *b; + BKT *bp; int cnt; char *sep; @@ -486,11 +444,12 @@ mpool_stat(mp) sep = ""; cnt = 0; - for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { - (void)fprintf(stderr, "%s%d", sep, b->pgno); - if (b->flags & MPOOL_DIRTY) + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) { + (void)fprintf(stderr, "%s%d", sep, bp->pgno); + if (bp->flags & MPOOL_DIRTY) (void)fprintf(stderr, "d"); - if (b->flags & MPOOL_PINNED) + if (bp->flags & MPOOL_PINNED) (void)fprintf(stderr, "P"); if (++cnt == 10) { sep = "\n"; @@ -502,33 +461,3 @@ mpool_stat(mp) (void)fprintf(stderr, "\n"); } #endif - -#ifdef DEBUG -#if __STDC__ -#include -#else -#include -#endif - -static void -#if __STDC__ -__mpoolerr(const char *fmt, ...) -#else -__mpoolerr(fmt, va_alist) - char *fmt; - va_dcl -#endif -{ - va_list ap; -#if __STDC__ - va_start(ap, fmt); -#else - va_start(ap); -#endif - (void)vfprintf(stderr, fmt, ap); - va_end(ap); - (void)fprintf(stderr, "\n"); - abort(); - /* NOTREACHED */ -} -#endif diff --git a/lib/libc/db/mpool/mpool.libtp b/lib/libc/db/mpool/mpool.libtp new file mode 100644 index 0000000..8c0fc27 --- /dev/null +++ b/lib/libc/db/mpool/mpool.libtp @@ -0,0 +1,746 @@ +/****************************************************************************** + +VERSION $Id: buf.c,v 1.26 92/01/09 09:15:26 margo Exp $ +PACKAGE: User Level Shared Memory Manager + +DESCRIPTION: + This package provides a buffer pool interface implemented as + a collection of file pages mapped into shared memory. + + Based on Mark's buffer manager + +ROUTINES: + External + buf_alloc + buf_flags + buf_get + buf_init + buf_last + buf_open + buf_pin + buf_sync + buf_unpin + Internal + bf_assign_buf + bf_fid_to_fd + bf_newbuf + bf_put_page + + +******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include "list.h" +#include "user.h" +#include "txn_sys.h" +#include "buf.h" +#include "semkeys.h" +#include "error.h" + +/* + we need to translate between some type of file id that the user + process passes and a file descriptor. For now, it's a nop. +*/ +#define GET_MASTER get_sem ( buf_spinlock ) +#define RELEASE_MASTER release_sem ( buf_spinlock ) + +#define LRUID *buf_lru +#define LRUP (bufhdr_table+*buf_lru) +#define MRU bufhdr_table[*buf_lru].lru.prev + +/* Global indicator that you have started reusing buffers */ +int do_statistics = 0; +/* + Process Statics (pointers into shared memory) +*/ +static BUF_T *buf_table = 0; +static BUFHDR_T *bufhdr_table; +static int *buf_hash_table; +static int *buf_lru; /* LRU is the free list */ +static int buf_spinlock; +static FINFO_T *buf_fids; +static int *buf_sp; /* Pointer to string free space */ +static char *buf_strings; + +/* Process Local FID->FD table */ +static int fds[NUM_FILE_ENTRIES]; + +/* Static routines */ +static BUFHDR_T *bf_assign_buf(); +static int bf_fid_to_fd(); +static BUFHDR_T *bf_newbuf(); +static int bf_put_page(); + +/* + Return 0 on success + 1 on failure +*/ +extern int +buf_init ( ) +{ + ADDR_T buf_region; + BUFHDR_T *bhp; + int i; + int ref_count; + int *spinlockp; + + /* + Initialize Process local structures + */ + for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) { + fds[i] = -1; + } + + buf_region = attach_region ( BUF_REGION_NAME, BUF_REGION_NUM, + BUF_REGION_SIZE, &ref_count ); + if ( !buf_region ) { + return (1); + } + error_log3 ( "Buf Region: ADDR: %d ID: %d SIZE: %d\n", buf_region, + BUF_REGION_NUM, BUF_REGION_SIZE ); + + buf_table = (BUF_T *)buf_region; + bufhdr_table = (BUFHDR_T *)(buf_table + NUM_BUFS); + buf_hash_table = (int *)(bufhdr_table + NUM_BUFS); + buf_lru = buf_hash_table + NUMTABLE_ENTRIES; + spinlockp = buf_lru + 1; + buf_fids = (FINFO_T *)(spinlockp+1); + buf_sp = (int *)(buf_fids + NUM_FILE_ENTRIES); + buf_strings = (char *)(buf_sp + 1); + + /* Create locking spinlock (gets creating holding the lock) */ + buf_spinlock = create_sem ( BUF_SPIN_NAME, BUF_SPIN_NUM, ref_count <= 1 ); + if ( buf_spinlock < 0 ) { + return(1); + } + if ( ref_count <= 1 ) { + *spinlockp = buf_spinlock; + + /* Now initialize the buffer manager */ + + /* 1. Free list */ + *buf_lru = 0; + + /* 2. Buffer headers */ + for ( i = 0, bhp = bufhdr_table; i < NUM_BUFS; bhp++, i++ ) { + bhp->lru.next = i+1; + bhp->lru.prev = i-1; + bhp->flags = 0; /* All Flags off */ + bhp->refcount = 0; + bhp->wait_proc = -1; /* No sleepers */ + LISTPE_INIT ( hash, bhp, i ); /* Hash chains */ + } + bufhdr_table[0].lru.prev = NUM_BUFS-1; + bufhdr_table[NUM_BUFS-1].lru.next = 0; + + /* 3. Hash Table */ + for ( i = 0; i < NUMTABLE_ENTRIES; i++ ) { + buf_hash_table[i] = NUM_BUFS; + } + + /* 4. File ID Table */ + for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) { + buf_fids[i].offset = -1; + buf_fids[i].npages = -1; + buf_fids[i].refcount = 0; + } + + /* 5. Free String Pointer */ + *buf_sp = (FILE_NAME_LEN*NUM_FILE_ENTRIES); + if (RELEASE_MASTER) { + return(1); + } + error_log0 ( "Initialized buffer region\n" ); + } + return (0); +} + +extern void +buf_exit() +{ + int ref; + int i; + + /* Flush Buffer Pool on Exit */ + for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) { + if ( fds[i] != -1 ) { + close ( fds[i] ); + } + } + if ( buf_table ) { + detach_region ( buf_table, BUF_REGION_NUM, BUF_REGION_SIZE, &ref ); + } + return; +} + +/* + We need an empty buffer. Find the LRU unpinned NON-Dirty page. +*/ +static BUFHDR_T * +bf_newbuf() +{ + int fd; + int lruid; + int nbytes; + int ndx; + BUFHDR_T *bhp; + + lruid = LRUID; + for ( bhp = LRUP; + bhp->flags & (BUF_PINNED|BUF_IO_IN_PROGRESS); + bhp = LISTP_NEXTP (bufhdr_table, lru, bhp ) ) { + + if ( bhp->lru.next == lruid ) { + /* OUT OF BUFFERS */ + error_log1 ( "All buffers are pinned. %s\n", + "Unable to grant buffer request" ); + return(NULL); + } + } + /* BHP can be used */ + if ( bhp->flags & BUF_DIRTY ) { + do_statistics = 1; + /* + MIS Check for log flushed appropriately + */ + fd = bf_fid_to_fd(bhp->id.file_id); + if ( fd == -1 ) { + error_log1 ("Invalid fid %d\n", bhp->id.file_id); + return(NULL); + } + if ( bf_put_page(fd, bhp) < 0 ) { + return(NULL); + } + } + /* Update Hash Pointers */ + ndx = BUF_HASH ( bhp->id.file_id, bhp->id.obj_id ); + LISTP_REMOVE(bufhdr_table, hash, bhp); + if ( buf_hash_table[ndx] == (bhp-bufhdr_table) ) { + if ( bhp->hash.next != (bhp-bufhdr_table) ) { + buf_hash_table[ndx] = bhp->hash.next; + } else { + buf_hash_table[ndx] = NUM_BUFS; + } + } + INIT_BUF(bhp); + + return(bhp); +} +/* + buf_alloc + + Add a page to a file and return a buffer for it. + +*/ +ADDR_T +buf_alloc ( fid, new_pageno ) +int fid; +int *new_pageno; +{ + BUFHDR_T *bhp; + int fd; + int len; + int ndx; + OBJ_T fobj; + + if (GET_MASTER) { + return(NULL); + } + if ( buf_fids[fid].npages == -1 ) { + /* initialize npages field */ + fd = bf_fid_to_fd ( fid ); + } + assert (fid < NUM_FILE_ENTRIES); + + *new_pageno = buf_fids[fid].npages; + if ( *new_pageno == -1 ) { + RELEASE_MASTER; + return ( NULL ); + } + buf_fids[fid].npages++; + ndx = BUF_HASH ( fid, *new_pageno ); + fobj.file_id = fid; + fobj.obj_id = *new_pageno; + bhp = bf_assign_buf ( ndx, &fobj, BF_PIN|BF_DIRTY|BF_EMPTY, &len ); + if ( RELEASE_MASTER ) { + /* Memory leak */ + return(NULL); + } + if ( bhp ) { + return ((ADDR_T)(buf_table+(bhp-bufhdr_table))); + } else { + return ( NULL ); + } +} + + +/* + Buffer Flags + BF_DIRTY Mark page as dirty + BF_EMPTY Don't initialize page, just get buffer + BF_PIN Retrieve with pin + +MIS +Might want to add a flag that sets an LSN for this buffer is the +DIRTY flag is set + +Eventually, you may want a flag that indicates the I/O and lock +request should be shipped off together, but not for now. +*/ +extern ADDR_T +buf_get ( file_id, page_id, flags, len ) +int file_id; +int page_id; +u_long flags; +int *len; /* Number of bytes read into buffer */ +{ + BUFHDR_T *bhp; + int bufid; + int fd; + int ndx; + int next_bufid; + int stat; + OBJ_T fobj; + + ndx = BUF_HASH ( file_id, page_id ); + fobj.file_id = (long) file_id; + fobj.obj_id = (long) page_id; + if ( GET_MASTER ) { + return(NULL); + } + /* + This could be a for loop, but we lose speed + by making all the cases general purpose so we + optimize for the no-collision case. + */ + bufid = buf_hash_table[ndx]; + if ( bufid < NUM_BUFS ) { + for ( bhp = bufhdr_table+bufid; + !OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID); + bhp = LISTP_NEXTP ( bufhdr_table, hash, bhp ) ) { + + if ( bhp->hash.next == bufid ) { + goto not_found; + } + } +/* found */ + if ( flags & BF_PIN ) { + bhp->flags |= BUF_PINNED; + bhp->refcount++; +#ifdef PIN_DEBUG + fprintf(stderr, "buf_get: %X PINNED (%d)\n", + buf_table + (bhp-bufhdr_table), bhp->refcount); +#endif + } + if ( flags & BF_DIRTY ) { + bhp->flags |= BUF_DIRTY; + } + + while ( bhp->flags & BUF_IO_IN_PROGRESS ) { + /* MIS -- eventually err check here */ +#ifdef DEBUG + printf("About to sleep on %d (me: %d\n)\n", bhp->wait_proc, + my_txnp - txn_table); +#endif +#ifdef WAIT_STATS + buf_waits++; +#endif + stat = proc_sleep_on ( &(bhp->wait_proc), buf_spinlock ); + if ( stat ) { + /* Memory leak */ + return(NULL); + } + if (!( bhp->flags & BUF_IO_IN_PROGRESS) && + (!OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID))) { + if (RELEASE_MASTER) + return(NULL); + return(buf_get ( file_id, page_id, flags, len )); + } + } + MAKE_MRU( bhp ); + *len = BUFSIZE; + } else { +not_found: + /* If you get here, the page isn't in the hash table */ + bhp = bf_assign_buf ( ndx, &fobj, flags, len ); + } + /* Common code between found and not found */ + + if ( bhp && bhp->flags & BUF_NEWPAGE ) { + *len = 0; + } + if (RELEASE_MASTER){ + /* Memory leak */ + return(NULL); + } + if ( bhp ) { + return ((ADDR_T)(buf_table+(bhp-bufhdr_table))); + } else { + return ( NULL ); + } +} + +/* + MIS - do I want to add file links to buffer pool? +*/ +extern int +buf_sync ( fid, close ) +int fid; +int close; /* should we dec refcount and possibly + invalidate all the buffers */ +{ + int i; + int fd; + int invalidate; + BUFHDR_T *bhp; + + if ( (fd = bf_fid_to_fd ( fid )) < 0 ) { + return(1); + } + if (GET_MASTER) { + return(1); + } + invalidate = (buf_fids[fid].refcount == 1 && close); + if ( invalidate ) + for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) { + if (bhp->id.file_id == fid) { + if ((bhp->flags & BF_DIRTY) && (bf_put_page( fd, bhp ) < 0)) { + return(1); + } + bhp->id.file_id = -1; + } + } + if (invalidate || close) + buf_fids[fid].refcount--; + + if (RELEASE_MASTER) { + return(1); + } + return(0); + + +} + +extern int +buf_flags ( addr, set_flags, unset_flags ) +ADDR_T addr; +u_long set_flags; +u_long unset_flags; +{ + int bufid; + BUFHDR_T *bhp; + +#ifdef PIN_DEBUG + fprintf(stderr, "buf_flags: %X setting %s%s%s%s%s releasing %s%s%s%s%s\n", + addr, + set_flags&BUF_DIRTY ? "DIRTY " : "", + set_flags&BUF_VALID ? "VALID " : "", + set_flags&BUF_PINNED ? "PINNED " : "", + set_flags&BUF_IO_ERROR ? "IO_ERROR " : "", + set_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "", + set_flags&BUF_NEWPAGE ? "NEWPAGE " : "", + unset_flags&BUF_DIRTY ? "DIRTY " : "", + unset_flags&BUF_VALID ? "VALID " : "", + unset_flags&BUF_PINNED ? "PINNED " : "", + unset_flags&BUF_IO_ERROR ? "IO_ERROR " : "", + unset_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "", + unset_flags&BUF_NEWPAGE ? "NEWPAGE " : "" ); +#endif + if (!ADDR_OK(addr)) { + error_log1 ( "buf_pin: Invalid Buffer Address %x\n", addr ); + return(1); + } + bufid = ((BUF_T *)addr) - buf_table; + assert ( bufid < NUM_BUFS); + bhp = &bufhdr_table[bufid]; + if (GET_MASTER) { + return(1); + } + bhp->flags |= set_flags; + if ( set_flags & BUF_PINNED ) { + bhp->refcount++; + } + if ( set_flags & BUF_DIRTY ) { + unset_flags |= BUF_NEWPAGE; + } + + if ( unset_flags & BUF_PINNED ) { + bhp->refcount--; + if ( bhp->refcount ) { + /* Turn off pin bit so it doesn't get unset */ + unset_flags &= ~BUF_PINNED; + } + } + bhp->flags &= ~unset_flags; + MAKE_MRU(bhp); + if (RELEASE_MASTER) { + return(1); + } + return(0); +} + +/* + Take a string name and produce an fid. + + returns -1 on error + + MIS -- this is a potential problem -- you keep actual names + here -- what if people run from different directories? +*/ +extern int +buf_name_lookup ( fname ) +char *fname; +{ + int i; + int fid; + int ndx; + + fid = -1; + if (GET_MASTER) { + return(-1); + } + for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) { + if ( buf_fids[i].offset == -1 ) { + fid = i; + } else { + if (!strcmp (fname, buf_strings+buf_fids[i].offset)) { + if (RELEASE_MASTER) { + return(-1); + } + buf_fids[i].refcount++; + return(i); + } + } + } + if ( fid == -1 ) { + error_log0 ( "No more file ID's\n" ); + } else { + ndx = *buf_sp - strlen(fname) - 1; + if ( ndx < 0 ) { + error_log0 ( "Out of string space\n" ); + fid = -1; + } else { + *buf_sp = ndx; + strcpy ( buf_strings+ndx, fname ); + buf_fids[fid].offset = ndx; + } + buf_fids[fid].refcount = 1; + } + if (RELEASE_MASTER) { + return(-1); + } + return(fid); +} + +static int +bf_fid_to_fd ( fid ) +int fid; +{ + struct stat sbuf; + + assert ( (fid < NUM_FILE_ENTRIES) && (buf_fids[fid].offset != -1) ); + if ( fds[fid] != -1 ) { + return(fds[fid]); + + } + fds[fid] = open ( buf_strings+buf_fids[fid].offset, O_RDWR|O_CREAT, + 0666 ); + if ( fds[fid] < 0 ) { + error_log3 ( "Error Opening File %s FID: %d FD: %d. Errno = %d\n", + buf_strings+buf_fids[fid].offset, fid, fds[fid], + errno ); + return(-1); + } + error_log3 ( "Opening File %s FID: %d FD: %d\n", + buf_strings+buf_fids[fid].offset, fid, fds[fid] ); + if ( buf_fids[fid].npages == -1 ) { + /* Initialize the npages field */ + if ( fstat ( fds[fid], &sbuf ) ) { + error_log3 ( "Error Fstating %s FID: %d. Errno = %d\n", + buf_strings+buf_fids[fid].offset, fid, errno ); + } else { + buf_fids[fid].npages = ( sbuf.st_size / BUFSIZE ); + } + } + + return ( fds[fid] ); +} + +static int +bf_put_page ( fd, bhp ) +int fd; +BUFHDR_T *bhp; +{ + int nbytes; + + assert ( (bhp-bufhdr_table) < NUM_BUFS ); + if ( lseek ( fd, bhp->id.obj_id << BUFSHIFT, L_SET ) < 0 ) { + return(-1); + } + bhp->flags |= BUF_IO_IN_PROGRESS; + if (RELEASE_MASTER) { + return(-1); + } + nbytes = write(fd, buf_table[bhp-bufhdr_table], BUFSIZE); + if (GET_MASTER) { + return(-2); + } + if ( nbytes < 0 ) { + error_log1 ("Write failed with error code %d\n", errno); + return(-1); + } else if ( nbytes != BUFSIZE ) { + error_log1 ("Short write: %d bytes of %d\n", nbytes, BUFSIZE ); + } + bhp->flags &= ~(BUF_DIRTY|BUF_IO_IN_PROGRESS); + return (0); +} + +static BUFHDR_T * +bf_assign_buf ( ndx, obj, flags, len ) +int ndx; +OBJ_T *obj; +u_long flags; +int *len; /* Number of bytes read */ +{ + BUFHDR_T *bhp; + int fd; + + assert ( obj->file_id < NUM_FILE_ENTRIES ); + bhp = bf_newbuf(); + if ( !bhp ) { + return(NULL); + } + OBJ_ASSIGN ( (*obj), bhp->id ); + if ( buf_hash_table[ndx] >= NUM_BUFS ) { + buf_hash_table[ndx] = bhp-bufhdr_table; + } else { + LISTPE_INSERT ( bufhdr_table, hash, bhp, buf_hash_table[ndx] ); + } + + bhp->flags |= BUF_VALID; + if ( flags & BF_PIN ) { + bhp->flags |= BUF_PINNED; + bhp->refcount++; +#ifdef PIN_DEBUG + fprintf(stderr, "bf_assign_buf: %X PINNED (%d)\n", + buf_table + (bhp-bufhdr_table), bhp->refcount); +#endif + } + fd = bf_fid_to_fd(obj->file_id); + if ( fd == -1 ) { + error_log1 ("Invalid fid %d\n", obj->file_id); + bhp->flags |= ~BUF_IO_ERROR; + return(NULL); + } + if ( obj->obj_id >= buf_fids[obj->file_id].npages) { + buf_fids[obj->file_id].npages = obj->obj_id+1; + *len = 0; + } else if ( flags & BF_EMPTY ) { + *len = 0; + } else { + bhp->flags |= BUF_IO_IN_PROGRESS; + if (RELEASE_MASTER) { + return(NULL); + } + if ( lseek ( fd, obj->obj_id << BUFSHIFT, L_SET ) < -1 ) { + error_log2 ("Unable to perform seek on file: %d to page %d", + obj->file_id, obj->obj_id ); + bhp->flags &= ~BUF_IO_IN_PROGRESS; + bhp->flags |= ~BUF_IO_ERROR; + return(NULL); + } + *len = read(fd, buf_table[bhp-bufhdr_table], BUFSIZE); + if ( *len < 0 ) { + error_log2 ("Unable to perform read on file: %d to page %d", + obj->file_id, obj->obj_id ); + bhp->flags &= ~BUF_IO_IN_PROGRESS; + bhp->flags |= ~BUF_IO_ERROR; + return(NULL); + } + if (GET_MASTER) { + return(NULL); + } + bhp->flags &= ~BUF_IO_IN_PROGRESS; + if ( bhp->wait_proc != -1 ) { + /* wake up waiter and anyone waiting on it */ +#ifdef DEBUG + printf("Waking transaction %d due to completed I/O\n", + bhp->wait_proc); +#endif + proc_wake_id ( bhp->wait_proc ); + bhp->wait_proc = -1; + } + MAKE_MRU(bhp); + } + + if ( flags & BF_DIRTY ) { + bhp->flags |= BUF_DIRTY; + } else if ( *len < BUFSIZE ) { + bhp->flags |= BUF_NEWPAGE; + } + return ( bhp ); +} + +int +buf_last ( fid ) +int fid; +{ + int val; + + if (GET_MASTER) { + return(-1); + } + assert ( fid < NUM_FILE_ENTRIES ); + if ( buf_fids[fid].npages == -1 ) { + /* initialize npages field */ + (void) bf_fid_to_fd ( fid ); + } + val = buf_fids[fid].npages; + if ( val ) { + val--; /* Convert to page number */ + } + if (RELEASE_MASTER) { + return(-1); + } + return(val); +} + +#ifdef DEBUG +extern void +buf_dump ( id, all ) +int id; +int all; +{ + int i; + BUFHDR_T *bhp; + + printf ( "LRU + %d\n", *buf_lru ); + if ( all ) { + printf("ID\tFID\tPID\tLNEXT\tLPREV\tHNEXT\tHPREV\tSLEEP\tFLAG\tREFS\n"); + for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) { + printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i, + bhp->id.file_id, bhp->id.obj_id, + bhp->lru.next, bhp->lru.prev, + bhp->hash.next, bhp->hash.prev, + bhp->wait_proc, bhp->flags, bhp->refcount ); + } + } else { + if ( id >= NUM_BUFS ) { + printf ( "Buffer ID (%d) too high\n", id ); + return; + } + bhp = bufhdr_table+id; + printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i, + bhp->id.file_id, bhp->id.obj_id, + bhp->lru.next, bhp->lru.prev, + bhp->hash.next, bhp->hash.prev, + bhp->wait_proc, bhp->flags, bhp->refcount ); + } + return; +} +#endif + diff --git a/lib/libc/db/recno/extern.h b/lib/libc/db/recno/extern.h index 2188131..feed434 100644 --- a/lib/libc/db/recno/extern.h +++ b/lib/libc/db/recno/extern.h @@ -30,14 +30,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)extern.h 8.2 (Berkeley) 2/21/94 + * @(#)extern.h 8.3 (Berkeley) 6/4/94 */ #include "../btree/extern.h" int __rec_close __P((DB *)); int __rec_delete __P((const DB *, const DBT *, u_int)); -int __rec_dleaf __P((BTREE *, PAGE *, indx_t)); +int __rec_dleaf __P((BTREE *, PAGE *, u_int32_t)); int __rec_fd __P((const DB *)); int __rec_fmap __P((BTREE *, recno_t)); int __rec_fout __P((BTREE *)); diff --git a/lib/libc/db/recno/rec_close.c b/lib/libc/db/recno/rec_close.c index a96d4f1..16fb0b4 100644 --- a/lib/libc/db/recno/rec_close.c +++ b/lib/libc/db/recno/rec_close.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_close.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)rec_close.c 8.6 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -76,11 +76,11 @@ __rec_close(dbp) /* Committed to closing. */ status = RET_SUCCESS; - if (ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize)) + if (F_ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize)) status = RET_ERROR; - if (!ISSET(t, R_INMEM)) - if (ISSET(t, R_CLOSEFP)) { + if (!F_ISSET(t, R_INMEM)) + if (F_ISSET(t, R_CLOSEFP)) { if (fclose(t->bt_rfp)) status = RET_ERROR; } else @@ -125,39 +125,58 @@ __rec_sync(dbp, flags) if (flags == R_RECNOSYNC) return (__bt_sync(dbp, 0)); - if (ISSET(t, R_RDONLY | R_INMEM) || !ISSET(t, R_MODIFIED)) + if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED)) return (RET_SUCCESS); /* Read any remaining records into the tree. */ - if (!ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); /* Rewind the file descriptor. */ if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0) return (RET_ERROR); - iov[1].iov_base = "\n"; - iov[1].iov_len = 1; - scursor = t->bt_rcursor; + /* Save the cursor. */ + scursor = t->bt_cursor.rcursor; key.size = sizeof(recno_t); key.data = &trec; - status = (dbp->seq)(dbp, &key, &data, R_FIRST); - while (status == RET_SUCCESS) { - iov[0].iov_base = data.data; - iov[0].iov_len = data.size; - if (writev(t->bt_rfd, iov, 2) != data.size + 1) - return (RET_ERROR); - status = (dbp->seq)(dbp, &key, &data, R_NEXT); - } - t->bt_rcursor = scursor; + if (F_ISSET(t, R_FIXLEN)) { + /* + * We assume that fixed length records are all fixed length. + * Any that aren't are either EINVAL'd or corrected by the + * record put code. + */ + status = (dbp->seq)(dbp, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + if (write(t->bt_rfd, data.data, data.size) != data.size) + return (RET_ERROR); + status = (dbp->seq)(dbp, &key, &data, R_NEXT); + } + } else { + iov[1].iov_base = &t->bt_bval; + iov[1].iov_len = 1; + + status = (dbp->seq)(dbp, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + iov[0].iov_base = data.data; + iov[0].iov_len = data.size; + if (writev(t->bt_rfd, iov, 2) != data.size + 1) + return (RET_ERROR); + status = (dbp->seq)(dbp, &key, &data, R_NEXT); + } + } + + /* Restore the cursor. */ + t->bt_cursor.rcursor = scursor; + if (status == RET_ERROR) return (RET_ERROR); if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1) return (RET_ERROR); if (ftruncate(t->bt_rfd, off)) return (RET_ERROR); - CLR(t, R_MODIFIED); + F_CLR(t, R_MODIFIED); return (RET_SUCCESS); } diff --git a/lib/libc/db/recno/rec_delete.c b/lib/libc/db/recno/rec_delete.c index 35f56b9..a16593d 100644 --- a/lib/libc/db/recno/rec_delete.c +++ b/lib/libc/db/recno/rec_delete.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_delete.c 8.4 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)rec_delete.c 8.7 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -88,13 +88,13 @@ __rec_delete(dbp, key, flags) status = rec_rdelete(t, nrec); break; case R_CURSOR: - if (!ISSET(t, B_SEQINIT)) + if (!F_ISSET(&t->bt_cursor, CURS_INIT)) goto einval; if (t->bt_nrecs == 0) return (RET_SPECIAL); - status = rec_rdelete(t, t->bt_rcursor - 1); + status = rec_rdelete(t, t->bt_cursor.rcursor - 1); if (status == RET_SUCCESS) - --t->bt_rcursor; + --t->bt_cursor.rcursor; break; default: einval: errno = EINVAL; @@ -102,7 +102,7 @@ einval: errno = EINVAL; } if (status == RET_SUCCESS) - SET(t, B_MODIFIED | R_MODIFIED); + F_SET(t, B_MODIFIED | R_MODIFIED); return (status); } @@ -154,11 +154,11 @@ int __rec_dleaf(t, h, index) BTREE *t; PAGE *h; - indx_t index; + u_int32_t index; { - register RLEAF *rl; - register indx_t *ip, cnt, offset; - register size_t nbytes; + RLEAF *rl; + indx_t *ip, cnt, offset; + u_int32_t nbytes; char *from; void *to; diff --git a/lib/libc/db/recno/rec_get.c b/lib/libc/db/recno/rec_get.c index 3555575..47dd773 100644 --- a/lib/libc/db/recno/rec_get.c +++ b/lib/libc/db/recno/rec_get.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_get.c 8.4 (Berkeley) 3/1/94"; +static char sccsid[] = "@(#)rec_get.c 8.9 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -90,7 +90,7 @@ __rec_get(dbp, key, data, flags) * original file. */ if (nrec > t->bt_nrecs) { - if (ISSET(t, R_EOF | R_INMEM)) + if (F_ISSET(t, R_EOF | R_INMEM)) return (RET_SPECIAL); if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS) return (status); @@ -101,7 +101,7 @@ __rec_get(dbp, key, data, flags) return (RET_ERROR); status = __rec_ret(t, e, 0, NULL, data); - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; @@ -127,31 +127,38 @@ __rec_fpipe(t, top) recno_t nrec; size_t len; int ch; - char *p; + u_char *p; - if (t->bt_dbufsz < t->bt_reclen) { - if ((t->bt_dbuf = - (char *)realloc(t->bt_dbuf, t->bt_reclen)) == NULL) + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) return (RET_ERROR); - t->bt_dbufsz = t->bt_reclen; + t->bt_rdata.size = t->bt_reclen; } - data.data = t->bt_dbuf; + data.data = t->bt_rdata.data; data.size = t->bt_reclen; - for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + for (nrec = t->bt_nrecs; nrec < top;) { len = t->bt_reclen; - for (p = t->bt_dbuf;; *p++ = ch) - if ((ch = getc(t->bt_rfp)) == EOF || !len--) { - if (__rec_iput(t, nrec, &data, 0) - != RET_SUCCESS) + for (p = t->bt_rdata.data;; *p++ = ch) + if ((ch = getc(t->bt_rfp)) == EOF || !--len) { + if (ch != EOF) + *p = ch; + if (len != 0) + memset(p, t->bt_bval, len); + if (__rec_iput(t, + nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); + ++nrec; break; } if (ch == EOF) break; } if (nrec < top) { - SET(t, R_EOF); + F_SET(t, R_EOF); return (RET_SPECIAL); } return (RET_SUCCESS); @@ -177,14 +184,15 @@ __rec_vpipe(t, top) indx_t len; size_t sz; int bval, ch; - char *p; + u_char *p; bval = t->bt_bval; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { - for (p = t->bt_dbuf, sz = t->bt_dbufsz;; *p++ = ch, --sz) { + for (p = t->bt_rdata.data, + sz = t->bt_rdata.size;; *p++ = ch, --sz) { if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) { - data.data = t->bt_dbuf; - data.size = p - t->bt_dbuf; + data.data = t->bt_rdata.data; + data.size = p - (u_char *)t->bt_rdata.data; if (ch == EOF && data.size == 0) break; if (__rec_iput(t, nrec, &data, 0) @@ -193,19 +201,21 @@ __rec_vpipe(t, top) break; } if (sz == 0) { - len = p - t->bt_dbuf; - t->bt_dbufsz += (sz = 256); - if ((t->bt_dbuf = (char *)realloc(t->bt_dbuf, - t->bt_dbufsz)) == NULL) + len = p - (u_char *)t->bt_rdata.data; + t->bt_rdata.size += (sz = 256); + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_rdata.size) : + realloc(t->bt_rdata.data, t->bt_rdata.size); + if (t->bt_rdata.data == NULL) return (RET_ERROR); - p = t->bt_dbuf + len; + p = (u_char *)t->bt_rdata.data + len; } } if (ch == EOF) break; } if (nrec < top) { - SET(t, R_EOF); + F_SET(t, R_EOF); return (RET_SPECIAL); } return (RET_SUCCESS); @@ -228,33 +238,36 @@ __rec_fmap(t, top) { DBT data; recno_t nrec; - caddr_t sp, ep; + u_char *sp, *ep, *p; size_t len; - char *p; - if (t->bt_dbufsz < t->bt_reclen) { - if ((t->bt_dbuf = - (char *)realloc(t->bt_dbuf, t->bt_reclen)) == NULL) + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) return (RET_ERROR); - t->bt_dbufsz = t->bt_reclen; + t->bt_rdata.size = t->bt_reclen; } - data.data = t->bt_dbuf; + data.data = t->bt_rdata.data; data.size = t->bt_reclen; - sp = t->bt_cmap; - ep = t->bt_emap; + sp = (u_char *)t->bt_cmap; + ep = (u_char *)t->bt_emap; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { if (sp >= ep) { - SET(t, R_EOF); + F_SET(t, R_EOF); return (RET_SPECIAL); } len = t->bt_reclen; - for (p = t->bt_dbuf; sp < ep && len--; *p++ = *sp++); - memset(p, t->bt_bval, len); + for (p = t->bt_rdata.data; + sp < ep && len > 0; *p++ = *sp++, --len); + if (len != 0) + memset(p, t->bt_bval, len); if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); } - t->bt_cmap = sp; + t->bt_cmap = (caddr_t)sp; return (RET_SUCCESS); } @@ -274,25 +287,25 @@ __rec_vmap(t, top) recno_t top; { DBT data; - caddr_t sp, ep; + u_char *sp, *ep; recno_t nrec; int bval; - sp = t->bt_cmap; - ep = t->bt_emap; + sp = (u_char *)t->bt_cmap; + ep = (u_char *)t->bt_emap; bval = t->bt_bval; for (nrec = t->bt_nrecs; nrec < top; ++nrec) { if (sp >= ep) { - SET(t, R_EOF); + F_SET(t, R_EOF); return (RET_SPECIAL); } for (data.data = sp; sp < ep && *sp != bval; ++sp); - data.size = sp - (caddr_t)data.data; + data.size = sp - (u_char *)data.data; if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) return (RET_ERROR); ++sp; } - t->bt_cmap = sp; + t->bt_cmap = (caddr_t)sp; return (RET_SUCCESS); } diff --git a/lib/libc/db/recno/rec_open.c b/lib/libc/db/recno/rec_open.c index 2a0f354..51d8a3c 100644 --- a/lib/libc/db/recno/rec_open.c +++ b/lib/libc/db/recno/rec_open.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_open.c 8.6 (Berkeley) 2/22/94"; +static char sccsid[] = "@(#)rec_open.c 8.10 (Berkeley) 9/1/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -98,7 +98,7 @@ __rec_open(fname, flags, mode, openinfo, dflags) t = dbp->internal; if (openinfo) { if (openinfo->flags & R_FIXEDLEN) { - SET(t, R_FIXLEN); + F_SET(t, R_FIXLEN); t->bt_reclen = openinfo->reclen; if (t->bt_reclen == 0) goto einval; @@ -107,12 +107,11 @@ __rec_open(fname, flags, mode, openinfo, dflags) } else t->bt_bval = '\n'; - SET(t, R_RECNO); + F_SET(t, R_RECNO); if (fname == NULL) - SET(t, R_EOF | R_INMEM); + F_SET(t, R_EOF | R_INMEM); else t->bt_rfd = rfd; - t->bt_rcursor = 0; if (fname != NULL) { /* @@ -124,20 +123,20 @@ __rec_open(fname, flags, mode, openinfo, dflags) if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) { switch (flags & O_ACCMODE) { case O_RDONLY: - SET(t, R_RDONLY); + F_SET(t, R_RDONLY); break; default: goto einval; } slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) goto err; - SET(t, R_CLOSEFP); + F_SET(t, R_CLOSEFP); t->bt_irec = - ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; + F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; } else { switch (flags & O_ACCMODE) { case O_RDONLY: - SET(t, R_RDONLY); + F_SET(t, R_RDONLY); break; case O_RDWR: break; @@ -157,8 +156,16 @@ slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) * fails if the file is too large. */ if (sb.st_size == 0) - SET(t, R_EOF); + F_SET(t, R_EOF); else { +#ifdef MMAP_NOT_AVAILABLE + /* + * XXX + * Mmap doesn't work correctly on many current + * systems. In particular, it can fail subtly, + * with cache coherency problems. Don't use it + * for now. + */ t->bt_msize = sb.st_size; if ((t->bt_smap = mmap(NULL, t->bt_msize, PROT_READ, MAP_PRIVATE, rfd, @@ -166,9 +173,12 @@ slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) goto slow; t->bt_cmap = t->bt_smap; t->bt_emap = t->bt_smap + sb.st_size; - t->bt_irec = ISSET(t, R_FIXLEN) ? + t->bt_irec = F_ISSET(t, R_FIXLEN) ? __rec_fmap : __rec_vmap; - SET(t, R_MEMMAPPED); + F_SET(t, R_MEMMAPPED); +#else + goto slow; +#endif } } } @@ -186,13 +196,14 @@ slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL) goto err; if ((h->flags & P_TYPE) == P_BLEAF) { - h->flags = h->flags & ~P_TYPE | P_RLEAF; + F_CLR(h, P_TYPE); + F_SET(h, P_RLEAF); mpool_put(t->bt_mp, h, MPOOL_DIRTY); } else mpool_put(t->bt_mp, h, 0); if (openinfo && openinfo->flags & R_SNAPSHOT && - !ISSET(t, R_EOF | R_INMEM) && + !F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) goto err; return (dbp); @@ -222,7 +233,7 @@ __rec_fd(dbp) } /* In-memory database can't have a file descriptor. */ - if (ISSET(t, R_INMEM)) { + if (F_ISSET(t, R_INMEM)) { errno = ENOENT; return (-1); } diff --git a/lib/libc/db/recno/rec_put.c b/lib/libc/db/recno/rec_put.c index 8c3bae5..1afae0d 100644 --- a/lib/libc/db/recno/rec_put.c +++ b/lib/libc/db/recno/rec_put.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_put.c 8.3 (Berkeley) 3/1/94"; +static char sccsid[] = "@(#)rec_put.c 8.7 (Berkeley) 8/18/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -66,7 +66,7 @@ __rec_put(dbp, key, data, flags) u_int flags; { BTREE *t; - DBT tdata; + DBT fdata, tdata; recno_t nrec; int status; @@ -78,11 +78,38 @@ __rec_put(dbp, key, data, flags) t->bt_pinned = NULL; } + /* + * If using fixed-length records, and the record is long, return + * EINVAL. If it's short, pad it out. Use the record data return + * memory, it's only short-term. + */ + if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) { + if (data->size > t->bt_reclen) + goto einval; + + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) + return (RET_ERROR); + t->bt_rdata.size = t->bt_reclen; + } + memmove(t->bt_rdata.data, data->data, data->size); + memset((char *)t->bt_rdata.data + data->size, + t->bt_bval, t->bt_reclen - data->size); + fdata.data = t->bt_rdata.data; + fdata.size = t->bt_reclen; + } else { + fdata.data = data->data; + fdata.size = data->size; + } + switch (flags) { case R_CURSOR: - if (!ISSET(t, B_SEQINIT)) + if (!F_ISSET(&t->bt_cursor, CURS_INIT)) goto einval; - nrec = t->bt_rcursor; + nrec = t->bt_cursor.rcursor; break; case R_SETCURSOR: if ((nrec = *(recno_t *)key->data) == 0) @@ -115,11 +142,11 @@ einval: errno = EINVAL; * already in the database. If skipping records, create empty ones. */ if (nrec > t->bt_nrecs) { - if (!ISSET(t, R_EOF | R_INMEM) && + if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, nrec) == RET_ERROR) return (RET_ERROR); if (nrec > t->bt_nrecs + 1) { - if (ISSET(t, R_FIXLEN)) { + if (F_ISSET(t, R_FIXLEN)) { if ((tdata.data = (void *)malloc(t->bt_reclen)) == NULL) return (RET_ERROR); @@ -133,18 +160,18 @@ einval: errno = EINVAL; if (__rec_iput(t, t->bt_nrecs, &tdata, 0) != RET_SUCCESS) return (RET_ERROR); - if (ISSET(t, R_FIXLEN)) + if (F_ISSET(t, R_FIXLEN)) free(tdata.data); } } - if ((status = __rec_iput(t, nrec - 1, data, flags)) != RET_SUCCESS) + if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS) return (status); if (flags == R_SETCURSOR) - t->bt_rcursor = nrec; + t->bt_cursor.rcursor = nrec; - SET(t, R_MODIFIED); + F_SET(t, R_MODIFIED); return (__rec_ret(t, NULL, nrec, key, NULL)); } @@ -171,7 +198,7 @@ __rec_iput(t, nrec, data, flags) PAGE *h; indx_t index, nxtindex; pgno_t pg; - size_t nbytes; + u_int32_t nbytes; int dflags, status; char *dest, db[NOVFLSIZE]; @@ -187,7 +214,7 @@ __rec_iput(t, nrec, data, flags) tdata.data = db; tdata.size = NOVFLSIZE; *(pgno_t *)db = pg; - *(size_t *)(db + sizeof(pgno_t)) = data->size; + *(u_int32_t *)(db + sizeof(pgno_t)) = data->size; dflags = P_BIGDATA; data = &tdata; } else @@ -246,7 +273,7 @@ __rec_iput(t, nrec, data, flags) WR_RLEAF(dest, data, dflags); ++t->bt_nrecs; - SET(t, B_MODIFIED); + F_SET(t, B_MODIFIED); mpool_put(t->bt_mp, h, MPOOL_DIRTY); return (RET_SUCCESS); diff --git a/lib/libc/db/recno/rec_search.c b/lib/libc/db/recno/rec_search.c index 5d5df3c..acc109e 100644 --- a/lib/libc/db/recno/rec_search.c +++ b/lib/libc/db/recno/rec_search.c @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_search.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)rec_search.c 8.4 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -91,8 +91,7 @@ __rec_search(t, recno, op) total += r->nrecs; } - if (__bt_push(t, pg, index - 1) == RET_ERROR) - return (NULL); + BT_PUSH(t, pg, index - 1); pg = r->pgno; switch (op) { diff --git a/lib/libc/db/recno/rec_seq.c b/lib/libc/db/recno/rec_seq.c index bc66d1c..f80992c 100644 --- a/lib/libc/db/recno/rec_seq.c +++ b/lib/libc/db/recno/rec_seq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #ifndef lint -static char sccsid[] = "@(#)rec_seq.c 8.2 (Berkeley) 9/7/93"; +static char sccsid[] = "@(#)rec_seq.c 8.3 (Berkeley) 7/14/94"; #endif /* not lint */ #include @@ -82,8 +82,8 @@ __rec_seq(dbp, key, data, flags) goto einval; break; case R_NEXT: - if (ISSET(t, B_SEQINIT)) { - nrec = t->bt_rcursor + 1; + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + nrec = t->bt_cursor.rcursor + 1; break; } /* FALLTHROUGH */ @@ -91,14 +91,14 @@ __rec_seq(dbp, key, data, flags) nrec = 1; break; case R_PREV: - if (ISSET(t, B_SEQINIT)) { - if ((nrec = t->bt_rcursor - 1) == 0) + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + if ((nrec = t->bt_cursor.rcursor - 1) == 0) return (RET_SPECIAL); break; } /* FALLTHROUGH */ case R_LAST: - if (!ISSET(t, R_EOF | R_INMEM) && + if (!F_ISSET(t, R_EOF | R_INMEM) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) return (RET_ERROR); nrec = t->bt_nrecs; @@ -109,7 +109,7 @@ einval: errno = EINVAL; } if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) { - if (!ISSET(t, R_EOF | R_INMEM) && + if (!F_ISSET(t, R_EOF | R_INMEM) && (status = t->bt_irec(t, nrec)) != RET_SUCCESS) return (status); if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) @@ -119,11 +119,11 @@ einval: errno = EINVAL; if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL) return (RET_ERROR); - SET(t, B_SEQINIT); - t->bt_rcursor = nrec; + F_SET(&t->bt_cursor, CURS_INIT); + t->bt_cursor.rcursor = nrec; status = __rec_ret(t, e, nrec, key, data); - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; diff --git a/lib/libc/db/recno/rec_utils.c b/lib/libc/db/recno/rec_utils.c index f7fb145..baea3fa 100644 --- a/lib/libc/db/recno/rec_utils.c +++ b/lib/libc/db/recno/rec_utils.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)rec_utils.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)rec_utils.c 8.6 (Berkeley) 7/16/94"; #endif /* LIBC_SCCS and not lint */ #include @@ -45,11 +45,14 @@ static char sccsid[] = "@(#)rec_utils.c 8.3 (Berkeley) 2/21/94"; #include "recno.h" /* - * __REC_RET -- Build return data as a result of search or scan. + * __rec_ret -- + * Build return data. * * Parameters: * t: tree - * d: LEAF to be returned to the user. + * e: key/data pair to be returned + * nrec: record number + * key: user's key structure * data: user's data structure * * Returns: @@ -62,53 +65,58 @@ __rec_ret(t, e, nrec, key, data) recno_t nrec; DBT *key, *data; { - register RLEAF *rl; - register void *p; + RLEAF *rl; + void *p; - if (data == NULL) - goto retkey; + if (key == NULL) + goto dataonly; - rl = GETRLEAF(e->page, e->index); + /* We have to copy the key, it's not on the page. */ + if (sizeof(recno_t) > t->bt_rkey.size) { + p = (void *)(t->bt_rkey.data == NULL ? + malloc(sizeof(recno_t)) : + realloc(t->bt_rkey.data, sizeof(recno_t))); + if (p == NULL) + return (RET_ERROR); + t->bt_rkey.data = p; + t->bt_rkey.size = sizeof(recno_t); + } + memmove(t->bt_rkey.data, &nrec, sizeof(recno_t)); + key->size = sizeof(recno_t); + key->data = t->bt_rkey.data; + +dataonly: + if (data == NULL) + return (RET_SUCCESS); /* - * We always copy big data to make it contigous. Otherwise, we + * We must copy big keys/data to make them contigous. Otherwise, * leave the page pinned and don't copy unless the user specified * concurrent access. */ + rl = GETRLEAF(e->page, e->index); if (rl->flags & P_BIGDATA) { if (__ovfl_get(t, rl->bytes, - &data->size, &t->bt_dbuf, &t->bt_dbufsz)) + &data->size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); - data->data = t->bt_dbuf; - } else if (ISSET(t, B_DB_LOCK)) { + data->data = t->bt_rdata.data; + } else if (F_ISSET(t, B_DB_LOCK)) { /* Use +1 in case the first record retrieved is 0 length. */ - if (rl->dsize + 1 > t->bt_dbufsz) { - if ((p = - (void *)realloc(t->bt_dbuf, rl->dsize + 1)) == NULL) + if (rl->dsize + 1 > t->bt_rdata.size) { + p = (void *)(t->bt_rdata.data == NULL ? + malloc(rl->dsize + 1) : + realloc(t->bt_rdata.data, rl->dsize + 1)); + if (p == NULL) return (RET_ERROR); - t->bt_dbuf = p; - t->bt_dbufsz = rl->dsize + 1; + t->bt_rdata.data = p; + t->bt_rdata.size = rl->dsize + 1; } - memmove(t->bt_dbuf, rl->bytes, rl->dsize); + memmove(t->bt_rdata.data, rl->bytes, rl->dsize); data->size = rl->dsize; - data->data = t->bt_dbuf; + data->data = t->bt_rdata.data; } else { data->size = rl->dsize; data->data = rl->bytes; } - -retkey: if (key == NULL) - return (RET_SUCCESS); - - /* We have to copy the key, it's not on the page. */ - if (sizeof(recno_t) > t->bt_kbufsz) { - if ((p = (void *)realloc(t->bt_kbuf, sizeof(recno_t))) == NULL) - return (RET_ERROR); - t->bt_kbuf = p; - t->bt_kbufsz = sizeof(recno_t); - } - memmove(t->bt_kbuf, &nrec, sizeof(recno_t)); - key->size = sizeof(recno_t); - key->data = t->bt_kbuf; return (RET_SUCCESS); } diff --git a/lib/libc/db/test/Makefile b/lib/libc/db/test/Makefile index c816432..a5dd08a 100644 --- a/lib/libc/db/test/Makefile +++ b/lib/libc/db/test/Makefile @@ -1,17 +1,23 @@ -# @(#)Makefile 8.9 (Berkeley) 2/21/94 +# @(#)Makefile 8.15 (Berkeley) 7/28/94 PROG= dbtest OBJS= dbtest.o strerror.o -# Add -DSTATISTICS to CFLAGS to get btree statistical use info. -# Note, the db library has to be compiled for statistics as well. -CFLAGS= -D__DBINTERFACE_PRIVATE -DDEBUG -O ${INC} +# Uncomment the STAT line get hash and btree statistical use info. This +# also forces ld to load the btree debug functions for use by gdb, which +# is useful. The db library has to be compiled with -DSTATISTICS as well. +INC= -I${PORTDIR}/include -I${PORTDIR} +OORG= -g +#STAT= -DSTATISTICS +CFLAGS= -D__DBINTERFACE_PRIVATE -DDEBUG ${STAT} ${OORG} ${INC} -dbtest: ${OBJS} ${LIB} - ${CC} -o $@ ${OBJS} ${LIB} +dbtest: ${OBJS} ${PORTDIR}/libdb.a + ${CC} -o $@ ${OBJS} ${PORTDIR}/libdb.a -strerror.o: ../PORT/clib/strerror.c - ${CC} -c ../PORT/clib/strerror.c +strerror.o: ${PORTDIR}/clib/strerror.c + ${CC} -c ${PORTDIR}/clib/strerror.c clean: - rm -f gmon.out ${OBJS} ${PROG} t1 t2 t3 + rm -f dbtest.core gmon.out ${OBJS} ${PROG} t1 t2 t3 + +${OBJS}: Makefile diff --git a/lib/libc/db/test/README b/lib/libc/db/test/README index 8631c77..0c0cd13 100644 --- a/lib/libc/db/test/README +++ b/lib/libc/db/test/README @@ -1,17 +1,25 @@ -# @(#)README 8.2 (Berkeley) 2/21/94 +# @(#)README 8.8 (Berkeley) 7/31/94 To build this portably, try something like: - make INC="-I../PORT/MACH/ -I../PORT/MACH/include" LIB=../PORT/MACH/libdb.a + make PORTDIR="../PORT/MACH" where MACH is the machine, i.e. "sunos.4.1.1". To run the tests, enter "sh run.test". If your system dictionary isn't in /usr/share/dict/words, edit run.test to reflect the correct place. +Fairly large files (the command files) are built in this directory during +the test runs, and even larger files (the database files) are created in +"/var/tmp". If the latter directory doesn't exist, set the environmental +variable TMPDIR to a directory where the files can be built. + =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -The script file consists of lines with a initial character which is -the "command" for that line. Legal characters are as follows: +The script file consists of lines with an initial character which is +the command for that line, or an initial character indicating a key +or data entry for a previous command. + +Legal command characters are as follows: c: compare a record + must be followed by [kK][dD]; the data value in the database @@ -20,17 +28,24 @@ c: compare a record e: echo a string + writes out the rest of the line into the output file; if the last character is not a carriage-return, a newline is appended. +f: set the flags for the next command + + no value zero's the flags g: do a get command + must be followed by [kK] + writes out the retrieved data DBT. +o [r]: dump [reverse] + + dump the database out, if 'r' is set, in reverse order. p: do a put command + must be followed by [kK][dD] r: do a del command - + must be followed by [kK] + + must be followed by [kK] unless R_CURSOR flag set. +S: sync the database s: do a seq command + + must be followed by [kK] if R_CURSOR flag set. + writes out the retrieved data DBT. -f: set the flags for the next command - + no value zero's the flags + +Legal key/data characters are as follows: + D [file]: data file + set the current data value to the contents of the file d [data]: @@ -39,17 +54,21 @@ K [file]: key file + set the current key value to the contents of the file k [data]: + set the current key value to the contents of the line. -o [r]: dump [reverse] - + dump the database out, if 'r' is set, in reverse order. + +Blank lines, lines with leading white space, and lines with leading +hash marks (#) are ignored. Options to dbtest are as follows: + -d: Set the DB_LOCK flag. -f: Use the file argument as the database file. -i: Use the rest of the argument to set elements in the info structure. If the type is btree, then "-i cachesize=10240" will set BTREEINFO.cachesize to 10240. -o: The rest of the argument is the output file instead of using stdout. + -s: Don't delete the database file before opening it, i.e. + use the database file from a previous run. -Dbtest requires two arguments, the type of access "hash", "recno" or -"btree", and the script name. +Dbtest requires two arguments, the type of access "hash", "recno" +or "btree", and the script name or "-" to indicate stdin. diff --git a/lib/libc/db/test/dbtest.c b/lib/libc/db/test/dbtest.c index b8f97dc..4341b4c 100644 --- a/lib/libc/db/test/dbtest.c +++ b/lib/libc/db/test/dbtest.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1992, 1993 + * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,12 +33,12 @@ #ifndef lint static char copyright[] = -"@(#) Copyright (c) 1992, 1993\n\ +"@(#) Copyright (c) 1992, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint -static char sccsid[] = "@(#)dbtest.c 8.8 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)dbtest.c 8.17 (Berkeley) 9/1/94"; #endif /* not lint */ #include @@ -65,6 +65,8 @@ void get __P((DB *, DBT *)); void getdata __P((DB *, DBT *, DBT *)); void put __P((DB *, DBT *, DBT *)); void rem __P((DB *, DBT *)); +char *sflags __P((int)); +void synk __P((DB *)); void *rfile __P((char *, size_t *)); void seq __P((DB *, DBT *)); u_int setflags __P((char *)); @@ -72,13 +74,14 @@ void *setinfo __P((DBTYPE, char *)); void usage __P((void)); void *xmalloc __P((char *, size_t)); -DBTYPE type; -void *infop; -u_long lineno; -u_int flags; -int ofd = STDOUT_FILENO; +DBTYPE type; /* Database type. */ +void *infop; /* Iflags. */ +u_long lineno; /* Current line in test script. */ +u_int flags; /* Current DB flags. */ +int ofd = STDOUT_FILENO; /* Standard output fd. */ DB *XXdbp; /* Global for gdb. */ +int XXlineno; /* Fast breakpoint for gdb. */ int main(argc, argv) @@ -91,14 +94,15 @@ main(argc, argv) DB *dbp; DBT data, key, keydata; size_t len; - int ch, oflags; - char *fname, *infoarg, *p, buf[8 * 1024]; + int ch, oflags, sflag; + char *fname, *infoarg, *p, *t, buf[8 * 1024]; infoarg = NULL; fname = NULL; oflags = O_CREAT | O_RDWR; - while ((ch = getopt(argc, argv, "f:i:lo:")) != EOF) - switch(ch) { + sflag = 0; + while ((ch = getopt(argc, argv, "f:i:lo:s")) != EOF) + switch (ch) { case 'f': fname = optarg; break; @@ -113,6 +117,9 @@ main(argc, argv) O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) err("%s: %s", optarg, strerror(errno)); break; + case 's': + sflag = 1; + break; case '?': default: usage(); @@ -127,8 +134,8 @@ main(argc, argv) type = dbtype(*argv++); /* Open the descriptor file. */ - if (freopen(*argv, "r", stdin) == NULL) - err("%s: %s", *argv, strerror(errno)); + if (strcmp(*argv, "-") && freopen(*argv, "r", stdin) == NULL) + err("%s: %s", *argv, strerror(errno)); /* Set up the db structure as necessary. */ if (infoarg == NULL) @@ -139,7 +146,10 @@ main(argc, argv) if (*p != '\0') infop = setinfo(type, p); - /* Open the DB. */ + /* + * Open the DB. Delete any preexisting copy, you almost never + * want it around, and it often screws up tests. + */ if (fname == NULL) { p = getenv("TMPDIR"); if (p == NULL) @@ -147,7 +157,9 @@ main(argc, argv) (void)sprintf(buf, "%s/__dbtest", p); fname = buf; (void)unlink(buf); - } + } else if (!sflag) + (void)unlink(fname); + if ((dbp = dbopen(fname, oflags, S_IRUSR | S_IWUSR, type, infop)) == NULL) err("dbopen: %s", strerror(errno)); @@ -156,8 +168,16 @@ main(argc, argv) state = COMMAND; for (lineno = 1; (p = fgets(buf, sizeof(buf), stdin)) != NULL; ++lineno) { - len = strlen(buf); - switch(*p) { + /* Delete the newline, displaying the key/data is easier. */ + if (ofd == STDOUT_FILENO && (t = strchr(p, '\n')) != NULL) + *t = '\0'; + if ((len = strlen(buf)) == 0 || isspace(*p) || *p == '#') + continue; + + /* Convenient gdb break point. */ + if (XXlineno == lineno) + XXlineno = 1; + switch (*p) { case 'c': /* compare */ if (state != COMMAND) err("line %lu: not expecting command", lineno); @@ -170,7 +190,8 @@ main(argc, argv) /* Don't display the newline, if CR at EOL. */ if (p[len - 2] == '\r') --len; - if (write(ofd, p + 1, len - 1) != len - 1) + if (write(ofd, p + 1, len - 1) != len - 1 || + write(ofd, "\n", 1) != 1) err("write: %s", strerror(errno)); break; case 'g': /* get */ @@ -188,8 +209,19 @@ main(argc, argv) case 'r': /* remove */ if (state != COMMAND) err("line %lu: not expecting command", lineno); - state = KEY; - command = REMOVE; + if (flags == R_CURSOR) { + rem(dbp, &key); + state = COMMAND; + } else { + state = KEY; + command = REMOVE; + } + break; + case 'S': /* sync */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + synk(dbp); + state = COMMAND; break; case 's': /* seq */ if (state != COMMAND) @@ -213,7 +245,7 @@ main(argc, argv) err("line %lu: not expecting data", lineno); data.data = xmalloc(p + 1, len - 1); data.size = len - 1; -ldata: switch(command) { +ldata: switch (command) { case COMPARE: compare(&keydata, &data); break; @@ -249,7 +281,7 @@ ldata: switch(command) { key.data = xmalloc(p + 1, len - 1); key.size = len - 1; } -lkey: switch(command) { +lkey: switch (command) { case COMPARE: getdata(dbp, &key, &keydata); state = DATA; @@ -265,13 +297,13 @@ lkey: switch(command) { break; case REMOVE: rem(dbp, &key); - if (type != DB_RECNO) + if ((type != DB_RECNO) && (flags != R_CURSOR)) free(key.data); state = COMMAND; break; case SEQ: seq(dbp, &key); - if (type != DB_RECNO) + if ((type != DB_RECNO) && (flags != R_CURSOR)) free(key.data); state = COMMAND; break; @@ -285,11 +317,15 @@ lkey: switch(command) { break; default: err("line %lu: %s: unknown command character", - p, lineno); + lineno, p); } } #ifdef STATISTICS - if (type == DB_BTREE) + /* + * -l must be used (DB_LOCK must be set) for this to be + * used, otherwise a page will be locked and it will fail. + */ + if (type == DB_BTREE && oflags & DB_LOCK) __bt_stat(dbp); #endif if (dbp->close(dbp)) @@ -299,7 +335,6 @@ lkey: switch(command) { } #define NOOVERWRITE "put failed, would overwrite key\n" -#define NOSUCHKEY "get failed, no such key\n" void compare(db1, db2) @@ -328,17 +363,23 @@ get(dbp, kp) { DBT data; - switch(dbp->get(dbp, kp, &data, flags)) { + switch (dbp->get(dbp, kp, &data, flags)) { case 0: (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); break; case -1: err("line %lu: get: %s", lineno, strerror(errno)); /* NOTREACHED */ case 1: - (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); - (void)fprintf(stderr, "%d: %.*s: %s\n", - lineno, kp->size, kp->data, NOSUCHKEY); +#define NOSUCHKEY "get failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); +#undef NOSUCHKEY break; } } @@ -348,14 +389,14 @@ getdata(dbp, kp, dp) DB *dbp; DBT *kp, *dp; { - switch(dbp->get(dbp, kp, dp, flags)) { + switch (dbp->get(dbp, kp, dp, flags)) { case 0: return; case -1: err("line %lu: getdata: %s", lineno, strerror(errno)); /* NOTREACHED */ case 1: - err("line %lu: get failed, no such key", lineno); + err("line %lu: getdata failed, no such key", lineno); /* NOTREACHED */ } } @@ -365,7 +406,7 @@ put(dbp, kp, dp) DB *dbp; DBT *kp, *dp; { - switch(dbp->put(dbp, kp, dp, flags)) { + switch (dbp->put(dbp, kp, dp, flags)) { case 0: break; case -1: @@ -382,34 +423,67 @@ rem(dbp, kp) DB *dbp; DBT *kp; { - switch(dbp->del(dbp, kp, flags)) { + switch (dbp->del(dbp, kp, flags)) { case 0: break; case -1: - err("line %lu: get: %s", lineno, strerror(errno)); + err("line %lu: rem: %s", lineno, strerror(errno)); /* NOTREACHED */ case 1: - (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); +#define NOSUCHKEY "rem failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else if (flags != R_CURSOR) + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); + else + (void)fprintf(stderr, + "%d: rem of cursor failed\n", lineno); +#undef NOSUCHKEY break; } } void +synk(dbp) + DB *dbp; +{ + switch (dbp->sync(dbp, flags)) { + case 0: + break; + case -1: + err("line %lu: synk: %s", lineno, strerror(errno)); + /* NOTREACHED */ + } +} + +void seq(dbp, kp) DB *dbp; DBT *kp; { DBT data; - switch(dbp->seq(dbp, kp, &data, flags)) { + switch (dbp->seq(dbp, kp, &data, flags)) { case 0: (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); break; case -1: err("line %lu: seq: %s", lineno, strerror(errno)); /* NOTREACHED */ case 1: - (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); +#define NOSUCHKEY "seq failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else if (flags == R_CURSOR) + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); + else + (void)fprintf(stderr, + "%d: seq (%s) failed\n", lineno, sflags(flags)); +#undef NOSUCHKEY break; } } @@ -430,9 +504,11 @@ dump(dbp, rev) nflags = R_NEXT; } for (;; flags = nflags) - switch(dbp->seq(dbp, &key, &data, flags)) { + switch (dbp->seq(dbp, &key, &data, flags)) { case 0: (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); break; case 1: goto done; @@ -451,31 +527,42 @@ setflags(s) char *p, *index(); for (; isspace(*s); ++s); - if (*s == '\n') + if (*s == '\n' || *s == '\0') return (0); if ((p = index(s, '\n')) != NULL) *p = '\0'; - if (!strcmp(s, "R_CURSOR")) - return (R_CURSOR); - if (!strcmp(s, "R_FIRST")) - return (R_FIRST); - if (!strcmp(s, "R_IAFTER")) - return (R_IAFTER); - if (!strcmp(s, "R_IBEFORE")) - return (R_IBEFORE); - if (!strcmp(s, "R_LAST")) - return (R_LAST); - if (!strcmp(s, "R_NEXT")) - return (R_NEXT); - if (!strcmp(s, "R_NOOVERWRITE")) - return (R_NOOVERWRITE); - if (!strcmp(s, "R_PREV")) - return (R_PREV); - if (!strcmp(s, "R_SETCURSOR")) - return (R_SETCURSOR); + if (!strcmp(s, "R_CURSOR")) return (R_CURSOR); + if (!strcmp(s, "R_FIRST")) return (R_FIRST); + if (!strcmp(s, "R_IAFTER")) return (R_IAFTER); + if (!strcmp(s, "R_IBEFORE")) return (R_IBEFORE); + if (!strcmp(s, "R_LAST")) return (R_LAST); + if (!strcmp(s, "R_NEXT")) return (R_NEXT); + if (!strcmp(s, "R_NOOVERWRITE")) return (R_NOOVERWRITE); + if (!strcmp(s, "R_PREV")) return (R_PREV); + if (!strcmp(s, "R_SETCURSOR")) return (R_SETCURSOR); + err("line %lu: %s: unknown flag", lineno, s); /* NOTREACHED */ } + +char * +sflags(flags) + int flags; +{ + switch (flags) { + case R_CURSOR: return ("R_CURSOR"); + case R_FIRST: return ("R_FIRST"); + case R_IAFTER: return ("R_IAFTER"); + case R_IBEFORE: return ("R_IBEFORE"); + case R_LAST: return ("R_LAST"); + case R_NEXT: return ("R_NEXT"); + case R_NOOVERWRITE: return ("R_NOOVERWRITE"); + case R_PREV: return ("R_PREV"); + case R_SETCURSOR: return ("R_SETCURSOR"); + } + + return ("UNKNOWN!"); +} DBTYPE dbtype(s) @@ -507,7 +594,7 @@ setinfo(type, s) if (!isdigit(*eq)) err("%s: structure set statement must be a number", s); - switch(type) { + switch (type) { case DB_BTREE: if (!strcmp("flags", s)) { ib.flags = atoi(eq); diff --git a/lib/libc/db/test/run.test b/lib/libc/db/test/run.test index 5eeaf74..52b74c3 100644 --- a/lib/libc/db/test/run.test +++ b/lib/libc/db/test/run.test @@ -1,19 +1,26 @@ #!/bin/sh - # -# @(#)run.test 8.7 (Berkeley) 9/16/93 +# @(#)run.test 8.10 (Berkeley) 7/26/94 # # db regression tests main() { -DICT=/usr/share/dict/words -#DICT=/usr/dict/words -PROG=./dbtest -TMP1=t1 -TMP2=t2 -TMP3=t3 + PROG=./dbtest + TMP1=t1 + TMP2=t2 + TMP3=t3 + if [ -f /usr/share/dict/words ]; then + DICT=/usr/share/dict/words + elif [ -f /usr/dict/words ]; then + DICT=/usr/dict/words + else + echo 'run.test: no dictionary' + exit 1 + fi + if [ $# -eq 0 ]; then for t in 1 2 3 4 5 6 7 8 9 10 11 12 13 20; do test$t @@ -345,7 +352,7 @@ test7() for (i = 1; i <= 120; ++i) printf("%05d: input key %d: %s\n", i, i, $0); printf("%05d: input key %d: %s\n", 120, 120, $0); - printf("get failed, no such key\n"); + printf("seq failed, no such key\n"); printf("%05d: input key %d: %s\n", 1, 1, $0); printf("%05d: input key %d: %s\n", 2, 2, $0); exit; @@ -364,10 +371,10 @@ test7() for (i = 1; i <= 120; ++i) printf("s\n"); printf("fR_CURSOR\ns\nk120\n"); - printf("r\nk120\n"); + printf("r\n"); printf("fR_NEXT\ns\n"); printf("fR_CURSOR\ns\nk1\n"); - printf("r\nk1\n"); + printf("r\n"); printf("fR_FIRST\ns\n"); }' > $TMP2 $PROG -o $TMP3 recno $TMP2 @@ -392,13 +399,11 @@ test8() if (i % 8 == 0) { printf("c\nkkey2\nD/bin/csh\n"); printf("c\nkkey1\nD/bin/sh\n"); - printf("e\t%d of 10 (comparison)\r\n", i); + printf("e\t%d of 10 (comparison)\n", i); } else - printf("e\t%d of 10 \r\n", i); + printf("e\t%d of 10 \n", i); printf("r\nkkey1\nr\nkkey2\n"); } - printf("e\n"); - printf("eend of test8 run\n"); }' > $TMP1 $PROG btree $TMP1 # $PROG hash $TMP1 @@ -459,7 +464,7 @@ test10() printf("p\nk%d\nd%s\n", ++i, $0); } END { - printf("fR_CURSOR\nr\nk1\n"); + printf("fR_CURSOR\nr\n"); printf("eR_CURSOR SHOULD HAVE FAILED\n"); }' > $TMP2 $PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1 @@ -573,7 +578,8 @@ test13() echo g echo k$i done > $TMP2 - $PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 + $PROG -s \ + -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 if (cmp -s $TMP1 $TMP3) ; then : else echo "test13: $type/$order get failed" -- cgit v1.1