diff options
Diffstat (limited to 'lib/libc/db/btree')
-rw-r--r-- | lib/libc/db/btree/Makefile.inc | 4 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_conv.c | 36 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_debug.c | 44 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_delete.c | 665 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_get.c | 141 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_overflow.c | 26 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_page.c | 21 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_put.c | 106 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_search.c | 130 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_seq.c | 380 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_split.c | 52 | ||||
-rw-r--r-- | lib/libc/db/btree/bt_utils.c | 103 | ||||
-rw-r--r-- | lib/libc/db/btree/btree.h | 262 | ||||
-rw-r--r-- | lib/libc/db/btree/extern.h | 14 |
14 files changed, 1148 insertions, 836 deletions
diff --git a/lib/libc/db/btree/Makefile.inc b/lib/libc/db/btree/Makefile.inc index 71f8dfc..8ed7649 100644 --- a/lib/libc/db/btree/Makefile.inc +++ b/lib/libc/db/btree/Makefile.inc @@ -1,7 +1,7 @@ -# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 +# @(#)Makefile.inc 8.2 (Berkeley) 7/14/94 .PATH: ${.CURDIR}/db/btree SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \ bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \ - bt_stack.c bt_utils.c + bt_utils.c diff --git a/lib/libc/db/btree/bt_conv.c b/lib/libc/db/btree/bt_conv.c index eb49340..1cb208b1 100644 --- a/lib/libc/db/btree/bt_conv.c +++ b/lib/libc/db/btree/bt_conv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_conv.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/param.h> @@ -68,7 +68,7 @@ __bt_pgin(t, pg, pp) u_char flags; char *p; - if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); @@ -89,7 +89,7 @@ __bt_pgin(t, pg, pp) M_16_SWAP(h->linp[i]); p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { @@ -104,9 +104,9 @@ __bt_pgin(t, pg, pp) M_16_SWAP(h->linp[i]); p = (char *)GETBLEAF(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); @@ -116,7 +116,7 @@ __bt_pgin(t, pg, pp) P_32_SWAP(p); } if (flags & P_BIGDATA) { - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); @@ -136,7 +136,7 @@ __bt_pgout(t, pg, pp) u_char flags; char *p; - if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) return; if (pg == P_META) { mswap(pp); @@ -149,7 +149,7 @@ __bt_pgout(t, pg, pp) for (i = 0; i < top; i++) { p = (char *)GETBINTERNAL(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); if (*(u_char *)p & P_BIGKEY) { @@ -164,9 +164,9 @@ __bt_pgout(t, pg, pp) for (i = 0; i < top; i++) { p = (char *)GETBLEAF(h, i); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); - p += sizeof(size_t); + p += sizeof(u_int32_t); flags = *(u_char *)p; if (flags & (P_BIGKEY | P_BIGDATA)) { p += sizeof(u_char); @@ -176,7 +176,7 @@ __bt_pgout(t, pg, pp) P_32_SWAP(p); } if (flags & P_BIGDATA) { - p += sizeof(size_t); + p += sizeof(u_int32_t); P_32_SWAP(p); p += sizeof(pgno_t); P_32_SWAP(p); @@ -206,16 +206,16 @@ mswap(pg) char *p; p = (char *)pg; - P_32_SWAP(p); /* m_magic */ + P_32_SWAP(p); /* magic */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_version */ + P_32_SWAP(p); /* version */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_psize */ + P_32_SWAP(p); /* psize */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_free */ + P_32_SWAP(p); /* free */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_nrecs */ + P_32_SWAP(p); /* nrecs */ p += sizeof(u_int32_t); - P_32_SWAP(p); /* m_flags */ + P_32_SWAP(p); /* flags */ p += sizeof(u_int32_t); } diff --git a/lib/libc/db/btree/bt_debug.c b/lib/libc/db/btree/bt_debug.c index 5f9ac1d..3aefbe7 100644 --- a/lib/libc/db/btree/bt_debug.c +++ b/lib/libc/db/btree/bt_debug.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_debug.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/param.h> @@ -65,24 +65,22 @@ __bt_dump(dbp) t = dbp->internal; (void)fprintf(stderr, "%s: pgsz %d", - ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); - if (ISSET(t, R_RECNO)) + F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); + if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " keys %lu", t->bt_nrecs); #undef X #define X(flag, name) \ - if (ISSET(t, flag)) { \ + if (F_ISSET(t, flag)) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } - if (t->bt_flags) { + if (t->flags != 0) { sep = " flags ("; - X(B_DELCRSR, "DELCRSR"); X(R_FIXLEN, "FIXLEN"); X(B_INMEM, "INMEM"); X(B_NODUPS, "NODUPS"); X(B_RDONLY, "RDONLY"); X(R_RECNO, "RECNO"); - X(B_SEQINIT, "SEQINIT"); X(B_METADIRTY,"METADIRTY"); (void)fprintf(stderr, ")\n"); } @@ -108,19 +106,19 @@ __bt_dmpage(h) char *sep; m = (BTMETA *)h; - (void)fprintf(stderr, "magic %lx\n", m->m_magic); - (void)fprintf(stderr, "version %lu\n", m->m_version); - (void)fprintf(stderr, "psize %lu\n", m->m_psize); - (void)fprintf(stderr, "free %lu\n", m->m_free); - (void)fprintf(stderr, "nrecs %lu\n", m->m_nrecs); - (void)fprintf(stderr, "flags %lu", m->m_flags); + (void)fprintf(stderr, "magic %lx\n", m->magic); + (void)fprintf(stderr, "version %lu\n", m->version); + (void)fprintf(stderr, "psize %lu\n", m->psize); + (void)fprintf(stderr, "free %lu\n", m->free); + (void)fprintf(stderr, "nrecs %lu\n", m->nrecs); + (void)fprintf(stderr, "flags %lu", m->flags); #undef X #define X(flag, name) \ - if (m->m_flags & flag) { \ + if (m->flags & flag) { \ (void)fprintf(stderr, "%s%s", sep, name); \ sep = ", "; \ } - if (m->m_flags) { + if (m->flags) { sep = " ("; X(B_NODUPS, "NODUPS"); X(R_RECNO, "RECNO"); @@ -192,7 +190,7 @@ __bt_dpage(h) h->lower, h->upper, top); for (cur = 0; cur < top; cur++) { (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BINTERNAL: bi = GETBINTERNAL(h, cur); (void)fprintf(stderr, @@ -214,14 +212,14 @@ __bt_dpage(h) (void)fprintf(stderr, "big key page %lu size %u/", *(pgno_t *)bl->bytes, - *(size_t *)(bl->bytes + sizeof(pgno_t))); + *(u_int32_t *)(bl->bytes + sizeof(pgno_t))); else if (bl->ksize) (void)fprintf(stderr, "%s/", bl->bytes); if (bl->flags & P_BIGDATA) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)(bl->bytes + bl->ksize), - *(size_t *)(bl->bytes + bl->ksize + + *(u_int32_t *)(bl->bytes + bl->ksize + sizeof(pgno_t))); else if (bl->dsize) (void)fprintf(stderr, "%.*s", @@ -233,7 +231,7 @@ __bt_dpage(h) (void)fprintf(stderr, "big data page %lu size %u", *(pgno_t *)rl->bytes, - *(size_t *)(rl->bytes + sizeof(pgno_t))); + *(u_int32_t *)(rl->bytes + sizeof(pgno_t))); else if (rl->dsize) (void)fprintf(stderr, "%.*s", (int)rl->dsize, rl->bytes); @@ -267,7 +265,7 @@ __bt_stat(dbp) pcont = pinternal = pleaf = 0; nkeys = ifree = lfree = 0; for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BINTERNAL: case P_RINTERNAL: ++pinternal; @@ -295,7 +293,7 @@ __bt_stat(dbp) (void)mpool_put(t->bt_mp, h, 0); break; } - i = ISSET(t, R_RECNO) ? + i = F_ISSET(t, R_RECNO) ? GETRINTERNAL(h, 0)->pgno : GETBINTERNAL(h, 0)->pgno; (void)mpool_put(t->bt_mp, h, 0); @@ -303,7 +301,7 @@ __bt_stat(dbp) (void)fprintf(stderr, "%d level%s with %ld keys", levels, levels == 1 ? "" : "s", nkeys); - if (ISSET(t, R_RECNO)) + if (F_ISSET(t, R_RECNO)) (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs); (void)fprintf(stderr, "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", diff --git a/lib/libc/db/btree/bt_delete.c b/lib/libc/db/btree/bt_delete.c index 5dba534..ece1ab6 100644 --- a/lib/libc/db/btree/bt_delete.c +++ b/lib/libc/db/btree/bt_delete.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -47,18 +47,17 @@ static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; #include <db.h> #include "btree.h" -static int bt_bdelete __P((BTREE *, const DBT *)); +static int __bt_bdelete __P((BTREE *, const DBT *)); +static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); +static int __bt_pdelete __P((BTREE *, PAGE *)); +static int __bt_relink __P((BTREE *, PAGE *)); +static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); /* - * __BT_DELETE -- Delete the item(s) referenced by a key. + * __bt_delete + * Delete the item(s) referenced by a key. * - * Parameters: - * dbp: pointer to access method - * key: key to delete - * flags: R_CURSOR if deleting what the cursor references - * - * Returns: - * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + * Return RET_SPECIAL if the key is not found. */ int __bt_delete(dbp, key, flags) @@ -67,6 +66,8 @@ __bt_delete(dbp, key, flags) u_int flags; { BTREE *t; + CURSOR *c; + PAGE *h; int status; t = dbp->internal; @@ -77,242 +78,433 @@ __bt_delete(dbp, key, flags) t->bt_pinned = NULL; } - if (ISSET(t, B_RDONLY)) { + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { errno = EPERM; return (RET_ERROR); } - switch(flags) { + switch (flags) { case 0: - status = bt_bdelete(t, key); + status = __bt_bdelete(t, key); break; case R_CURSOR: /* - * If flags is R_CURSOR, delete the cursor; must already have - * started a scan and not have already deleted the record. For - * the delete cursor bit to have been set requires that the - * scan be initialized, so no reason to check. + * If flags is R_CURSOR, delete the cursor. Must already + * have started a scan and not have already deleted it. */ - if (!ISSET(t, B_SEQINIT)) - goto einval; - status = ISSET(t, B_DELCRSR) ? - RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); - break; + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT)) { + if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * If the page is about to be emptied, we'll need to + * delete it, which means we have to acquire a stack. + */ + if (NEXTINDEX(h) == 1) + if (__bt_stkacq(t, &h, &t->bt_cursor)) + return (RET_ERROR); + + status = __bt_dleaf(t, NULL, h, c->pg.index); + + if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + } else + mpool_put(t->bt_mp, + h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); + break; + } + /* FALLTHROUGH */ default: -einval: errno = EINVAL; + errno = EINVAL; return (RET_ERROR); } if (status == RET_SUCCESS) - SET(t, B_MODIFIED); + F_SET(t, B_MODIFIED); return (status); } /* - * BT_BDELETE -- Delete all key/data pairs matching the specified key. + * __bt_stkacq -- + * Acquire a stack so we can delete a cursor entry. * * Parameters: - * tree: tree + * t: tree + * hp: pointer to current, pinned PAGE pointer + * c: pointer to the cursor + * + * Returns: + * 0 on success, 1 on failure + */ +static int +__bt_stkacq(t, hp, c) + BTREE *t; + PAGE **hp; + CURSOR *c; +{ + BINTERNAL *bi; + EPG *e; + EPGNO *parent; + PAGE *h; + indx_t index; + pgno_t pgno; + recno_t nextpg, prevpg; + int exact, level; + + /* + * Find the first occurrence of the key in the tree. Toss the + * currently locked page so we don't hit an already-locked page. + */ + h = *hp; + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* See if we got it in one shot. */ + if (h->pgno == c->pg.pgno) + goto ret; + + /* + * Move right, looking for the page. At each move we have to move + * up the stack until we don't have to move to the next page. If + * we have to change pages at an internal level, we have to fix the + * stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((nextpg = h->nextpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != NEXTINDEX(h) - 1) { + index = parent->index + 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + BT_PUSH(t, pgno, 0); + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + index = 0; + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) + return (1); + } + + if (h->pgno == c->pg.pgno) + goto ret; + + /* Reacquire the original stack. */ + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* + * Move left, looking for the page. At each move we have to move + * up the stack until we don't have to change pages to move to the + * next page. If we have to change pages at an internal level, we + * have to fix the stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((prevpg = h->prevpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != 0) { + index = parent->index - 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + + index = NEXTINDEX(h) - 1; + BT_PUSH(t, pgno, index); + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) + return (1); + } + + +ret: mpool_put(t->bt_mp, h, 0); + return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); +} + +/* + * __bt_bdelete -- + * Delete all key/data pairs matching the specified key. + * + * Parameters: + * t: tree * key: key to delete * * Returns: * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. */ static int -bt_bdelete(t, key) +__bt_bdelete(t, key) BTREE *t; const DBT *key; { - EPG *e, save; + EPG *e; PAGE *h; - pgno_t cpgno, pg; - indx_t cindex; - int deleted, dirty1, dirty2, exact; + int deleted, exact, redo; + + deleted = 0; /* Find any matching record; __bt_search pins the page. */ - if ((e = __bt_search(t, key, &exact)) == NULL) - return (RET_ERROR); +loop: if ((e = __bt_search(t, key, &exact)) == NULL) + return (deleted ? RET_SUCCESS : RET_ERROR); if (!exact) { mpool_put(t->bt_mp, e->page, 0); - return (RET_SPECIAL); + return (deleted ? RET_SUCCESS : RET_SPECIAL); } /* - * Delete forward, then delete backward, from the found key. The - * ordering is so that the deletions don't mess up the page refs. - * The first loop deletes the key from the original page, the second - * unpins the original page. In the first loop, dirty1 is set if - * the original page is modified, and dirty2 is set if any subsequent - * pages are modified. In the second loop, dirty1 starts off set if - * the original page has been modified, and is set if any subsequent - * pages are modified. - * - * If find the key referenced by the cursor, don't delete it, just - * flag it for future deletion. The cursor page number is P_INVALID - * unless the sequential scan is initialized, so no reason to check. - * A special case is when the already deleted cursor record was the - * only record found. If so, then the delete opertion fails as no - * records were deleted. - * - * Cycle in place in the current page until the current record doesn't - * match the key or the page is empty. If the latter, walk forward, - * skipping empty pages and repeating until a record doesn't match - * the key or the end of the tree is reached. + * Delete forward, then delete backward, from the found key. If + * there are duplicates and we reach either side of the page, do + * the key search again, so that we get them all. */ - cpgno = t->bt_bcursor.pgno; - cindex = t->bt_bcursor.index; - save = *e; - dirty1 = 0; - for (h = e->page, deleted = 0;;) { - dirty2 = 0; - do { - if (h->pgno == cpgno && e->index == cindex) { - if (!ISSET(t, B_DELCRSR)) { - SET(t, B_DELCRSR); - deleted = 1; - } - ++e->index; - } else { - if (__bt_dleaf(t, h, e->index)) { - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); - mpool_put(t->bt_mp, save.page, dirty1); + redo = 0; + h = e->page; + do { + if (__bt_dleaf(t, key, h, e->index)) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (F_ISSET(t, B_NODUPS)) { + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) return (RET_ERROR); - } - if (h->pgno == save.page->pgno) - dirty1 = MPOOL_DIRTY; - else - dirty2 = MPOOL_DIRTY; - deleted = 1; - } - } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); - - /* - * Quit if didn't find a match, no next page, or first key on - * the next page doesn't match. Don't unpin the original page - * unless an error occurs. - */ - if (e->index < NEXTINDEX(h)) - break; - for (;;) { - if ((pg = h->nextpg) == P_INVALID) - goto done1; - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, save.page, dirty1); - return (RET_ERROR); - } - if (NEXTINDEX(h) != 0) { - e->page = h; - e->index = 0; - break; - } + } else + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); } + deleted = 1; + } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); + /* Check for right-hand edge of the page. */ + if (e->index == NEXTINDEX(h)) + redo = 1; + + /* Delete from the key to the beginning of the page. */ + while (e->index-- > 0) { if (__bt_cmp(t, key, e) != 0) break; + if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (e->index == 0) + redo = 1; } - /* - * Reach here with the original page and the last page referenced - * pinned (they may be the same). Release it if not the original. - */ -done1: if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, dirty2); + /* Check for an empty page. */ + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + goto loop; + } + + /* Put the page. */ + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + if (redo) + goto loop; + return (RET_SUCCESS); +} + +/* + * __bt_pdelete -- + * Delete a single page from the tree. + * + * Parameters: + * t: tree + * h: leaf page + * + * Returns: + * RET_SUCCESS, RET_ERROR. + * + * Side-effects: + * mpool_put's the page + */ +static int +__bt_pdelete(t, h) + BTREE *t; + PAGE *h; +{ + BINTERNAL *bi; + PAGE *pg; + EPGNO *parent; + indx_t cnt, index, *ip, offset; + u_int32_t nksize; + char *from; /* - * Walk backwards from the record previous to the record returned by - * __bt_search, skipping empty pages, until a record doesn't match - * the key or reach the beginning of the tree. + * Walk the parent page stack -- a LIFO stack of the pages that were + * traversed when we searched for the page where the delete occurred. + * Each stack entry is a page number and a page index offset. The + * offset is for the page traversed on the search. We've just deleted + * a page, so we have to delete the key from the parent page. + * + * If the delete from the parent page makes it empty, this process may + * continue all the way up the tree. We stop if we reach the root page + * (which is never deleted, it's just not worth the effort) or if the + * delete does not empty the page. */ - *e = save; - for (;;) { - if (e->index) - --e->index; - for (h = e->page; e->index; --e->index) { - if (__bt_cmp(t, key, e) != 0) - goto done2; - if (h->pgno == cpgno && e->index == cindex) { - if (!ISSET(t, B_DELCRSR)) { - SET(t, B_DELCRSR); - deleted = 1; - } + while ((parent = BT_POP(t)) != NULL) { + /* Get the parent page. */ + if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (RET_ERROR); + + index = parent->index; + bi = GETBINTERNAL(pg, index); + + /* Free any overflow pages. */ + if (bi->flags & P_BIGKEY && + __ovfl_delete(t, bi->bytes) == RET_ERROR) { + mpool_put(t->bt_mp, pg, 0); + return (RET_ERROR); + } + + /* + * Free the parent if it has only the one key and it's not the + * root page. If it's the rootpage, turn it back into an empty + * leaf page. + */ + if (NEXTINDEX(pg) == 1) + if (pg->pgno == P_ROOT) { + pg->lower = BTDATAOFF; + pg->upper = t->bt_psize; + pg->flags = P_BLEAF; } else { - if (__bt_dleaf(t, h, e->index) == RET_ERROR) { - mpool_put(t->bt_mp, h, dirty1); + if (__bt_relink(t, pg) || __bt_free(t, pg)) return (RET_ERROR); - } - if (h->pgno == save.page->pgno) - dirty1 = MPOOL_DIRTY; - deleted = 1; + continue; } + else { + /* Pack remaining key items at the end of the page. */ + nksize = NBINTERNAL(bi->ksize); + from = (char *)pg + pg->upper; + memmove(from + nksize, from, (char *)bi - from); + pg->upper += nksize; + + /* Adjust indices' offsets, shift the indices down. */ + offset = pg->linp[index]; + for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nksize; + for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; + pg->lower -= sizeof(indx_t); } - if ((pg = h->prevpg) == P_INVALID) - goto done2; - mpool_put(t->bt_mp, h, dirty1); - dirty1 = 0; - if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - e->index = NEXTINDEX(e->page); + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + break; } - /* - * Reach here with the last page that was looked at pinned. Release - * it. - */ -done2: mpool_put(t->bt_mp, h, dirty1); - return (deleted ? RET_SUCCESS : RET_SPECIAL); + /* Free the leaf page, as long as it wasn't the root. */ + if (h->pgno == P_ROOT) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); + } + return (__bt_relink(t, h) || __bt_free(t, h)); } /* - * __BT_DLEAF -- Delete a single record from a leaf page. + * __bt_dleaf -- + * Delete a single record from a leaf page. * * Parameters: * t: tree - * index: index on current page to delete + * key: referenced key + * h: page + * index: index on page to delete * * Returns: * RET_SUCCESS, RET_ERROR. */ int -__bt_dleaf(t, h, index) +__bt_dleaf(t, key, h, index) BTREE *t; + const DBT *key; PAGE *h; - indx_t index; + u_int index; { - register BLEAF *bl; - register indx_t cnt, *ip, offset; - register size_t nbytes; - char *from; + BLEAF *bl; + indx_t cnt, *ip, offset; + u_int32_t nbytes; void *to; + char *from; - /* - * Delete a record from a btree leaf page. Internal records are never - * deleted from internal pages, regardless of the records that caused - * them to be added being deleted. Pages made empty by deletion are - * not reclaimed. They are, however, made available for reuse. - * - * Pack the remaining entries at the end of the page, shift the indices - * down, overwriting the deleted record and its index. If the record - * uses overflow pages, make them available for reuse. - */ + /* If this record is referenced by the cursor, delete the cursor. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && + __bt_curdel(t, key, h, index)) + return (RET_ERROR); + + /* If the entry uses overflow pages, make them available for reuse. */ to = bl = GETBLEAF(h, index); if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) return (RET_ERROR); if (bl->flags & P_BIGDATA && __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) return (RET_ERROR); - nbytes = NBLEAF(bl); - /* - * Compress the key/data pairs. Compress and adjust the [BR]LEAF - * offsets. Reset the headers. - */ + /* Pack the remaining key/data items at the end of the page. */ + nbytes = NBLEAF(bl); from = (char *)h + h->upper; memmove(from + nbytes, from, (char *)to - from); h->upper += nbytes; + /* Adjust the indices' offsets, shift the indices down. */ offset = h->linp[index]; for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) if (ip[0] < offset) @@ -320,5 +512,146 @@ __bt_dleaf(t, h, index) for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; h->lower -= sizeof(indx_t); + + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) + --t->bt_cursor.pg.index; + return (RET_SUCCESS); } + +/* + * __bt_curdel -- + * Delete the cursor. + * + * Parameters: + * t: tree + * key: referenced key (or NULL) + * h: page + * index: index on page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +static int +__bt_curdel(t, key, h, index) + BTREE *t; + const DBT *key; + PAGE *h; + u_int index; +{ + CURSOR *c; + EPG e; + PAGE *pg; + int curcopy, status; + + /* + * If there are duplicates, move forward or backward to one. + * Otherwise, copy the key into the cursor area. + */ + c = &t->bt_cursor; + F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); + + curcopy = 0; + if (!F_ISSET(t, B_NODUPS)) { + /* + * We're going to have to do comparisons. If we weren't + * provided a copy of the key, i.e. the user is deleting + * the current cursor position, get one. + */ + if (key == NULL) { + e.page = h; + e.index = index; + if ((status = __bt_ret(t, &e, + &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) + return (status); + curcopy = 1; + key = &c->key; + } + /* Check previous key, if not at the beginning of the page. */ + if (index > 0) { + e.page = h; + e.index = index - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup2; + } + } + /* Check next key, if not at the end of the page. */ + if (index < NEXTINDEX(h) - 1) { + e.page = h; + e.index = index + 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); + goto dup2; + } + } + /* Check previous key if at the beginning of the page. */ + if (index == 0 && h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = NEXTINDEX(pg) - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup1; + } + mpool_put(t->bt_mp, pg, 0); + } + /* Check next key if at the end of the page. */ + if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = 0; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); +dup1: mpool_put(t->bt_mp, pg, 0); +dup2: c->pg.pgno = e.page->pgno; + c->pg.index = e.index; + return (RET_SUCCESS); + } + mpool_put(t->bt_mp, pg, 0); + } + } + e.page = h; + e.index = index; + if (curcopy || (status = + __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { + F_SET(c, CURS_ACQUIRE); + return (RET_SUCCESS); + } + return (status); +} + +/* + * __bt_relink -- + * Link around a deleted page. + * + * Parameters: + * t: tree + * h: page to be deleted + */ +static int +__bt_relink(t, h) + BTREE *t; + PAGE *h; +{ + PAGE *pg; + + if (h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + pg->prevpg = h->prevpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + if (h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + pg->nextpg = h->nextpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + return (0); +} diff --git a/lib/libc/db/btree/bt_get.c b/lib/libc/db/btree/bt_get.c index 28b2d60..74824c7 100644 --- a/lib/libc/db/btree/bt_get.c +++ b/lib/libc/db/btree/bt_get.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_get.c 8.2 (Berkeley) 9/7/93"; +static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -91,148 +91,15 @@ __bt_get(dbp, key, data, flags) return (RET_SPECIAL); } - /* - * A special case is if we found the record but it's flagged for - * deletion. In this case, we want to find another record with the - * same key, if it exists. Rather than look around the tree we call - * __bt_first and have it redo the search, as __bt_first will not - * return keys marked for deletion. Slow, but should never happen. - */ - if (ISSET(t, B_DELCRSR) && e->page->pgno == t->bt_bcursor.pgno && - e->index == t->bt_bcursor.index) { - mpool_put(t->bt_mp, e->page, 0); - if ((e = __bt_first(t, key, &exact)) == NULL) - return (RET_ERROR); - if (!exact) - return (RET_SPECIAL); - } + status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0); - status = __bt_ret(t, e, NULL, data); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e->page, 0); else t->bt_pinned = e->page; return (status); } - -/* - * __BT_FIRST -- Find the first entry. - * - * Parameters: - * t: the tree - * key: the key - * - * Returns: - * The first entry in the tree greater than or equal to key. - */ -EPG * -__bt_first(t, key, exactp) - BTREE *t; - const DBT *key; - int *exactp; -{ - register PAGE *h; - register EPG *e; - EPG save; - pgno_t cpgno, pg; - indx_t cindex; - int found; - - /* - * Find any matching record; __bt_search pins the page. Only exact - * matches are tricky, otherwise just return the location of the key - * if it were to be inserted into the tree. - */ - if ((e = __bt_search(t, key, exactp)) == NULL) - return (NULL); - if (!*exactp) - return (e); - - if (ISSET(t, B_DELCRSR)) { - cpgno = t->bt_bcursor.pgno; - cindex = t->bt_bcursor.index; - } else { - cpgno = P_INVALID; - cindex = 0; /* GCC thinks it's uninitialized. */ - } - - /* - * Walk backwards, skipping empty pages, as long as the entry matches - * and there are keys left in the tree. Save a copy of each match in - * case we go too far. A special case is that we don't return a match - * on records that the cursor references that have already been flagged - * for deletion. - */ - save = *e; - h = e->page; - found = 0; - do { - if (cpgno != h->pgno || cindex != e->index) { - if (save.page->pgno != e->page->pgno) { - mpool_put(t->bt_mp, save.page, 0); - save = *e; - } else - save.index = e->index; - found = 1; - } - /* - * Make a special effort not to unpin the page the last (or - * original) match was on, but also make sure it's unpinned - * if an error occurs. - */ - while (e->index == 0) { - if (h->prevpg == P_INVALID) - goto done1; - if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) { - if (h->pgno == save.page->pgno) - mpool_put(t->bt_mp, save.page, 0); - return (NULL); - } - e->page = h; - e->index = NEXTINDEX(h); - } - --e->index; - } while (__bt_cmp(t, key, e) == 0); - - /* - * Reach here with the last page that was looked at pinned, which may - * or may not be the same as the last (or original) match page. If - * it's not useful, release it. - */ -done1: if (h->pgno != save.page->pgno) - mpool_put(t->bt_mp, h, 0); - - /* - * If still haven't found a record, the only possibility left is the - * next one. Move forward one slot, skipping empty pages and check. - */ - if (!found) { - h = save.page; - if (++save.index == NEXTINDEX(h)) { - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) { - *exactp = 0; - return (e); - } - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (NULL); - } while ((save.index = NEXTINDEX(h)) == 0); - save.page = h; - } - if (__bt_cmp(t, key, &save) != 0) { - *exactp = 0; - return (e); - } - } - *e = save; - *exactp = 1; - return (e); -} diff --git a/lib/libc/db/btree/bt_overflow.c b/lib/libc/db/btree/bt_overflow.c index 0057a03..b28b8e0 100644 --- a/lib/libc/db/btree/bt_overflow.c +++ b/lib/libc/db/btree/bt_overflow.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/param.h> @@ -69,7 +69,7 @@ static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94"; * * Parameters: * t: tree - * p: pointer to { pgno_t, size_t } + * p: pointer to { pgno_t, u_int32_t } * buf: storage address * bufsz: storage size * @@ -81,15 +81,16 @@ __ovfl_get(t, p, ssz, buf, bufsz) BTREE *t; void *p; size_t *ssz; - char **buf; + void **buf; size_t *bufsz; { PAGE *h; pgno_t pg; - size_t nb, plen, sz; + size_t nb, plen; + u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); *ssz = sz; #ifdef DEBUG @@ -98,7 +99,8 @@ __ovfl_get(t, p, ssz, buf, bufsz) #endif /* Make the buffer bigger as necessary. */ if (*bufsz < sz) { - if ((*buf = (char *)realloc(*buf, sz)) == NULL) + *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); + if (*buf == NULL) return (RET_ERROR); *bufsz = sz; } @@ -142,7 +144,8 @@ __ovfl_put(t, dbt, pg) PAGE *h, *last; void *p; pgno_t npg; - size_t nb, plen, sz; + size_t nb, plen; + u_int32_t sz; /* * Allocate pages and copy the key/data record into them. Store the @@ -181,7 +184,7 @@ __ovfl_put(t, dbt, pg) * * Parameters: * t: tree - * p: pointer to { pgno_t, size_t } + * p: pointer to { pgno_t, u_int32_t } * * Returns: * RET_ERROR, RET_SUCCESS @@ -193,10 +196,11 @@ __ovfl_delete(t, p) { PAGE *h; pgno_t pg; - size_t plen, sz; + size_t plen; + u_int32_t sz; memmove(&pg, p, sizeof(pgno_t)); - memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); #ifdef DEBUG if (pg == P_INVALID || sz == 0) diff --git a/lib/libc/db/btree/bt_page.c b/lib/libc/db/btree/bt_page.c index f71a40d..0d9d138 100644 --- a/lib/libc/db/btree/bt_page.c +++ b/lib/libc/db/btree/bt_page.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -43,7 +43,8 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; #include "btree.h" /* - * __BT_FREE -- Put a page on the freelist. + * __bt_free -- + * Put a page on the freelist. * * Parameters: * t: tree @@ -51,13 +52,16 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; * * Returns: * RET_ERROR, RET_SUCCESS + * + * Side-effect: + * mpool_put's the page. */ int __bt_free(t, h) BTREE *t; PAGE *h; { - /* Insert the page at the start of the free list. */ + /* Insert the page at the head of the free list. */ h->prevpg = P_INVALID; h->nextpg = t->bt_free; t->bt_free = h->pgno; @@ -67,7 +71,8 @@ __bt_free(t, h) } /* - * __BT_NEW -- Get a new page, preferably from the freelist. + * __bt_new -- + * Get a new page, preferably from the freelist. * * Parameters: * t: tree @@ -85,9 +90,9 @@ __bt_new(t, npg) if (t->bt_free != P_INVALID && (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { - *npg = t->bt_free; - t->bt_free = h->nextpg; - return (h); + *npg = t->bt_free; + t->bt_free = h->nextpg; + return (h); } return (mpool_new(t->bt_mp, npg)); } diff --git a/lib/libc/db/btree/bt_put.c b/lib/libc/db/btree/bt_put.c index 11a211b..952be09 100644 --- a/lib/libc/db/btree/bt_put.c +++ b/lib/libc/db/btree/bt_put.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_put.c 8.3 (Berkeley) 9/16/93"; +static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -76,7 +76,7 @@ __bt_put(dbp, key, data, flags) PAGE *h; indx_t index, nxtindex; pgno_t pg; - size_t nbytes; + u_int32_t nbytes; int dflags, exact, status; char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; @@ -88,33 +88,38 @@ __bt_put(dbp, key, data, flags) t->bt_pinned = NULL; } + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + switch (flags) { - case R_CURSOR: - if (!ISSET(t, B_SEQINIT)) - goto einval; - if (ISSET(t, B_DELCRSR)) - goto einval; - break; case 0: case R_NOOVERWRITE: break; + case R_CURSOR: + /* + * If flags is R_CURSOR, put the cursor. Must already + * have started a scan and not have already deleted it. + */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, + CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + break; + /* FALLTHROUGH */ default: -einval: errno = EINVAL; - return (RET_ERROR); - } - - if (ISSET(t, B_RDONLY)) { - errno = EPERM; + errno = EINVAL; return (RET_ERROR); } /* - * If the key/data won't fit on a page, store it on indirect pages. - * Only store the key on the overflow page if it's too big after the - * data is on an overflow page. + * If the key/data pair won't fit on a page, store it on overflow + * pages. Only put the key on the overflow page if the pair are + * still too big after moving the data to an overflow page. * * XXX - * If the insert fails later on, these pages aren't recovered. + * If the insert fails later on, the overflow pages aren't recovered. */ dflags = 0; if (key->size + data->size > t->bt_ovflsize) { @@ -125,7 +130,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) tkey.size = NOVFLSIZE; memmove(kb, &pg, sizeof(pgno_t)); memmove(kb + sizeof(pgno_t), - &key->size, sizeof(size_t)); + &key->size, sizeof(u_int32_t)); dflags |= P_BIGKEY; key = &tkey; } @@ -136,7 +141,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) tdata.size = NOVFLSIZE; memmove(db, &pg, sizeof(pgno_t)); memmove(db + sizeof(pgno_t), - &data->size, sizeof(size_t)); + &data->size, sizeof(u_int32_t)); dflags |= P_BIGDATA; data = &tdata; } @@ -146,15 +151,15 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) /* Replace the cursor. */ if (flags == R_CURSOR) { - if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) + if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) return (RET_ERROR); - index = t->bt_bcursor.index; + index = t->bt_cursor.pg.index; goto delete; } /* - * Find the key to delete, or, the location at which to insert. Bt_fast - * and __bt_search pin the returned page. + * Find the key to delete, or, the location at which to insert. + * Bt_fast and __bt_search both pin the returned page. */ if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) if ((e = __bt_search(t, key, &exact)) == NULL) @@ -163,34 +168,26 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) index = e->index; /* - * Add the specified key/data pair to the tree. If an identical key - * is already in the tree, and R_NOOVERWRITE is set, an error is - * returned. If R_NOOVERWRITE is not set, the key is either added (if - * duplicates are permitted) or an error is returned. - * - * Pages are split as required. + * Add the key/data pair to the tree. If an identical key is already + * in the tree, and R_NOOVERWRITE is set, an error is returned. If + * R_NOOVERWRITE is not set, the key is either added (if duplicates are + * permitted) or an error is returned. */ switch (flags) { case R_NOOVERWRITE: if (!exact) break; - /* - * One special case is if the cursor references the record and - * it's been flagged for deletion. Then, we delete the record, - * leaving the cursor there -- this means that the inserted - * record will not be seen in a cursor scan. - */ - if (ISSET(t, B_DELCRSR) && t->bt_bcursor.pgno == h->pgno && - t->bt_bcursor.index == index) { - CLR(t, B_DELCRSR); - goto delete; - } mpool_put(t->bt_mp, h, 0); return (RET_SPECIAL); default: - if (!exact || !ISSET(t, B_NODUPS)) + if (!exact || !F_ISSET(t, B_NODUPS)) break; -delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { + /* + * !!! + * Note, the delete may empty the page, so we need to put a + * new entry into the page immediately. + */ +delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, 0); return (RET_ERROR); } @@ -220,6 +217,12 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { dest = (char *)h + h->upper; WR_BLEAF(dest, key, data, dflags); + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index) + ++t->bt_cursor.pg.index; + if (t->bt_order == NOT) if (h->nextpg == P_INVALID) { if (index == NEXTINDEX(h) - 1) { @@ -238,11 +241,10 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { mpool_put(t->bt_mp, h, MPOOL_DIRTY); success: - if (flags == R_SETCURSOR) { - t->bt_bcursor.pgno = e->page->pgno; - t->bt_bcursor.index = e->index; - } - SET(t, B_MODIFIED); + if (flags == R_SETCURSOR) + __bt_setcur(t, e->page->pgno, e->index); + + F_SET(t, B_MODIFIED); return (RET_SUCCESS); } @@ -267,7 +269,7 @@ bt_fast(t, key, data, exactp) int *exactp; { PAGE *h; - size_t nbytes; + u_int32_t nbytes; int cmp; if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { @@ -278,8 +280,8 @@ bt_fast(t, key, data, exactp) t->bt_cur.index = t->bt_last.index; /* - * If won't fit in this page or have too many keys in this page, have - * to search to get split stack. + * If won't fit in this page or have too many keys in this page, + * have to search to get split stack. */ nbytes = NBLEAFDBT(key->size, data->size); if (h->upper - h->lower < nbytes + sizeof(indx_t)) diff --git a/lib/libc/db/btree/bt_search.c b/lib/libc/db/btree/bt_search.c index ff33421..485afcb 100644 --- a/lib/libc/db/btree/bt_search.c +++ b/lib/libc/db/btree/bt_search.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94"; +static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -45,11 +45,12 @@ static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94"; #include <db.h> #include "btree.h" -static int bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); -static int bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); +static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); +static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); /* - * __BT_SEARCH -- Search a btree for a key. + * __bt_search -- + * Search a btree for a key. * * Parameters: * t: tree to search @@ -95,24 +96,26 @@ __bt_search(t, key, exactp) } /* - * If it's a leaf page, and duplicates aren't allowed, we're - * done. If duplicates are allowed, it's possible that there - * were duplicate keys on duplicate pages, and they were later - * deleted, so we could be on a page with no matches while - * there are matches on other pages. If we're at the start or - * end of a page, check on both sides. + * If it's a leaf page, we're almost done. If no duplicates + * are allowed, or we have an exact match, we're done. Else, + * it's possible that there were matching keys on this page, + * which later deleted, and we're on a page with no matches + * while there are matches on other pages. If at the start or + * end of a page, check the adjacent page. */ if (h->flags & P_BLEAF) { - t->bt_cur.index = base; - *exactp = 0; - if (!ISSET(t, B_NODUPS)) { + if (!F_ISSET(t, B_NODUPS)) { if (base == 0 && - bt_sprev(t, h, key, exactp)) + h->prevpg != P_INVALID && + __bt_sprev(t, h, key, exactp)) return (&t->bt_cur); if (base == NEXTINDEX(h) && - bt_snext(t, h, key, exactp)) + h->nextpg != P_INVALID && + __bt_snext(t, h, key, exactp)) return (&t->bt_cur); } + *exactp = 0; + t->bt_cur.index = base; return (&t->bt_cur); } @@ -125,111 +128,86 @@ __bt_search(t, key, exactp) */ index = base ? base - 1 : base; -next: if (__bt_push(t, h->pgno, index) == RET_ERROR) - return (NULL); +next: BT_PUSH(t, h->pgno, index); pg = GETBINTERNAL(h, index)->pgno; mpool_put(t->bt_mp, h, 0); } } /* - * BT_SNEXT -- Check for an exact match after the key. + * __bt_snext -- + * Check for an exact match after the key. * * Parameters: - * t: tree to search - * h: current page. - * key: key to find + * t: tree + * h: current page + * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int -bt_snext(t, h, key, exactp) +__bt_snext(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; - PAGE *tp; - pgno_t pg; - /* Skip until reach the end of the tree or a key. */ - for (pg = h->nextpg; pg != P_INVALID;) { - if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, h, 0); - return (NULL); - } - if (NEXTINDEX(tp) != 0) - break; - pg = tp->prevpg; - mpool_put(t->bt_mp, tp, 0); - } /* - * The key is either an exact match, or not as good as - * the one we already have. + * Get the next page. The key is either an exact + * match, or not as good as the one we already have. */ - if (pg != P_INVALID) { - e.page = tp; - e.index = NEXTINDEX(tp) - 1; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } + if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (0); + e.index = 0; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); } + mpool_put(t->bt_mp, e.page, 0); return (0); } /* - * BT_SPREV -- Check for an exact match before the key. + * __bt_sprev -- + * Check for an exact match before the key. * * Parameters: - * t: tree to search - * h: current page. - * key: key to find + * t: tree + * h: current page + * key: key * exactp: pointer to exact match flag * * Returns: * If an exact match found. */ static int -bt_sprev(t, h, key, exactp) +__bt_sprev(t, h, key, exactp) BTREE *t; PAGE *h; const DBT *key; int *exactp; { EPG e; - PAGE *tp; - pgno_t pg; - /* Skip until reach the beginning of the tree or a key. */ - for (pg = h->prevpg; pg != P_INVALID;) { - if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { - mpool_put(t->bt_mp, h, 0); - return (NULL); - } - if (NEXTINDEX(tp) != 0) - break; - pg = tp->prevpg; - mpool_put(t->bt_mp, tp, 0); - } /* - * The key is either an exact match, or not as good as - * the one we already have. + * Get the previous page. The key is either an exact + * match, or not as good as the one we already have. */ - if (pg != P_INVALID) { - e.page = tp; - e.index = NEXTINDEX(tp) - 1; - if (__bt_cmp(t, key, &e) == 0) { - mpool_put(t->bt_mp, h, 0); - t->bt_cur = e; - *exactp = 1; - return (1); - } + if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (0); + e.index = NEXTINDEX(e.page) - 1; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); } + mpool_put(t->bt_mp, e.page, 0); return (0); } diff --git a/lib/libc/db/btree/bt_seq.c b/lib/libc/db/btree/bt_seq.c index 182ef70..303b481 100644 --- a/lib/libc/db/btree/bt_seq.c +++ b/lib/libc/db/btree/bt_seq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; +static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -48,24 +48,21 @@ static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; #include <db.h> #include "btree.h" -static int bt_seqadv __P((BTREE *, EPG *, int)); -static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); +static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); +static int __bt_seqadv __P((BTREE *, EPG *, int)); +static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); /* * Sequential scan support. * - * The tree can be scanned sequentially, starting from either end of the tree - * or from any specific key. A scan request before any scanning is done is - * initialized as starting from the least node. - * - * Each tree has an EPGNO which has the current position of the cursor. The - * cursor has to survive deletions/insertions in the tree without losing its - * position. This is done by noting deletions without doing them, and then - * doing them when the cursor moves (or the tree is closed). + * The tree can be scanned sequentially, starting from either end of the + * tree or from any specific key. A scan request before any scanning is + * done is initialized as starting from the least node. */ /* - * __BT_SEQ -- Btree sequential scan interface. + * __bt_seq -- + * Btree sequential scan interface. * * Parameters: * dbp: pointer to access method @@ -96,21 +93,21 @@ __bt_seq(dbp, key, data, flags) /* * If scan unitialized as yet, or starting at a specific record, set - * the scan to a specific key. Both bt_seqset and bt_seqadv pin the - * page the cursor references if they're successful. + * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin + * the page the cursor references if they're successful. */ - switch(flags) { + switch (flags) { case R_NEXT: case R_PREV: - if (ISSET(t, B_SEQINIT)) { - status = bt_seqadv(t, &e, flags); + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + status = __bt_seqadv(t, &e, flags); break; } /* FALLTHROUGH */ - case R_CURSOR: case R_FIRST: case R_LAST: - status = bt_seqset(t, &e, key, flags); + case R_CURSOR: + status = __bt_seqset(t, &e, key, flags); break; default: errno = EINVAL; @@ -118,27 +115,26 @@ __bt_seq(dbp, key, data, flags) } if (status == RET_SUCCESS) { - status = __bt_ret(t, &e, key, data); + __bt_setcur(t, e.page->pgno, e.index); - /* Update the actual cursor. */ - t->bt_bcursor.pgno = e.page->pgno; - t->bt_bcursor.index = e.index; + status = + __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); /* * If the user is doing concurrent access, we copied the * key/data, toss the page. */ - if (ISSET(t, B_DB_LOCK)) + if (F_ISSET(t, B_DB_LOCK)) mpool_put(t->bt_mp, e.page, 0); else t->bt_pinned = e.page; - SET(t, B_SEQINIT); } return (status); } /* - * BT_SEQSET -- Set the sequential scan to a specific key. + * __bt_seqset -- + * Set the sequential scan to a specific key. * * Parameters: * t: tree @@ -153,87 +149,50 @@ __bt_seq(dbp, key, data, flags) * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int -bt_seqset(t, ep, key, flags) +__bt_seqset(t, ep, key, flags) BTREE *t; EPG *ep; DBT *key; int flags; { - EPG *e; PAGE *h; pgno_t pg; int exact; /* - * Delete any already deleted record that we've been saving because - * the cursor pointed to it. Since going to a specific key, should - * delete any logically deleted records so they aren't found. - */ - if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) - return (RET_ERROR); - - /* - * Find the first, last or specific key in the tree and point the cursor - * at it. The cursor may not be moved until a new key has been found. + * Find the first, last or specific key in the tree and point the + * cursor at it. The cursor may not be moved until a new key has + * been found. */ - switch(flags) { + switch (flags) { case R_CURSOR: /* Keyed scan. */ /* - * Find the first instance of the key or the smallest key which - * is greater than or equal to the specified key. If run out - * of keys, return RET_SPECIAL. + * Find the first instance of the key or the smallest key + * which is greater than or equal to the specified key. */ if (key->data == NULL || key->size == 0) { errno = EINVAL; return (RET_ERROR); } - e = __bt_first(t, key, &exact); /* Returns pinned page. */ - if (e == NULL) - return (RET_ERROR); - /* - * If at the end of a page, skip any empty pages and find the - * next entry. - */ - if (e->index == NEXTINDEX(e->page)) { - h = e->page; - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); - e->index = 0; - e->page = h; - } - *ep = *e; - break; + return (__bt_first(t, key, ep, &exact)); case R_FIRST: /* First record. */ case R_NEXT: /* Walk down the left-hand side of the tree. */ for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, 0)->pgno; mpool_put(t->bt_mp, h, 0); } - - /* Skip any empty pages. */ - while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } - - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - ep->page = h; ep->index = 0; break; @@ -243,25 +202,19 @@ bt_seqset(t, ep, key, flags) for (pg = P_ROOT;;) { if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + if (h->flags & (P_BLEAF | P_RLEAF)) break; pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; mpool_put(t->bt_mp, h, 0); } - /* Skip any empty pages. */ - while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { - pg = h->prevpg; - mpool_put(t->bt_mp, h, 0); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } - - if (NEXTINDEX(h) == 0) { - mpool_put(t->bt_mp, h, 0); - return (RET_SPECIAL); - } - ep->page = h; ep->index = NEXTINDEX(h) - 1; break; @@ -270,7 +223,8 @@ bt_seqset(t, ep, key, flags) } /* - * BT_SEQADVANCE -- Advance the sequential scan. + * __bt_seqadvance -- + * Advance the sequential scan. * * Parameters: * t: tree @@ -283,98 +237,224 @@ bt_seqset(t, ep, key, flags) * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. */ static int -bt_seqadv(t, e, flags) +__bt_seqadv(t, ep, flags) BTREE *t; - EPG *e; + EPG *ep; int flags; { - EPGNO *c, delc; + CURSOR *c; PAGE *h; indx_t index; pgno_t pg; + int exact; - /* Save the current cursor if going to delete it. */ - c = &t->bt_bcursor; - if (ISSET(t, B_DELCRSR)) - delc = *c; + /* + * There are a couple of states that we can be in. The cursor has + * been initialized by the time we get here, but that's all we know. + */ + c = &t->bt_cursor; - if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) + /* + * The cursor was deleted where there weren't any duplicate records, + * so the key was saved. Find out where that key would go in the + * current tree. It doesn't matter if the returned key is an exact + * match or not -- if it's an exact match, the record was added after + * the delete so we can just return it. If not, as long as there's + * a record there, return it. + */ + if (F_ISSET(c, CURS_ACQUIRE)) + return (__bt_first(t, &c->key, ep, &exact)); + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) return (RET_ERROR); /* - * Find the next/previous record in the tree and point the cursor at it. - * The cursor may not be moved until a new key has been found. + * Find the next/previous record in the tree and point the cursor at + * it. The cursor may not be moved until a new key has been found. */ - index = c->index; - switch(flags) { + switch (flags) { case R_NEXT: /* Next record. */ + /* + * The cursor was deleted in duplicate records, and moved + * forward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_AFTER)) + goto usecurrent; + index = c->pg.index; if (++index == NEXTINDEX(h)) { - do { - pg = h->nextpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); index = 0; } break; case R_PREV: /* Previous record. */ - if (index-- == 0) { - do { - pg = h->prevpg; - mpool_put(t->bt_mp, h, 0); - if (pg == P_INVALID) - return (RET_SPECIAL); - if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) - return (RET_ERROR); - } while (NEXTINDEX(h) == 0); - index = NEXTINDEX(h) - 1; + /* + * The cursor was deleted in duplicate records, and moved + * backward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_BEFORE)) { +usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); + ep->page = h; + ep->index = c->pg.index; + return (RET_SUCCESS); } + index = c->pg.index; + if (index == 0) { + pg = h->prevpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + index = NEXTINDEX(h) - 1; + } else + --index; break; } - e->page = h; - e->index = index; + ep->page = h; + ep->index = index; + return (RET_SUCCESS); +} + +/* + * __bt_first -- + * Find the first entry. + * + * Parameters: + * t: the tree + * key: the key + * erval: return EPG + * exactp: pointer to exact match flag + * + * Returns: + * The first entry in the tree greater than or equal to key, + * or RET_SPECIAL if no such key exists. + */ +static int +__bt_first(t, key, erval, exactp) + BTREE *t; + const DBT *key; + EPG *erval; + int *exactp; +{ + PAGE *h; + EPG *ep, save; + pgno_t pg; /* - * Delete any already deleted record that we've been saving because the - * cursor pointed to it. This could cause the new index to be shifted - * down by one if the record we're deleting is on the same page and has - * a larger index. + * Find any matching record; __bt_search pins the page. + * + * If it's an exact match and duplicates are possible, walk backwards + * in the tree until we find the first one. Otherwise, make sure it's + * a valid key (__bt_search may return an index just past the end of a + * page) and return it. */ - if (ISSET(t, B_DELCRSR)) { - CLR(t, B_DELCRSR); /* Don't try twice. */ - if (c->pgno == delc.pgno && c->index > delc.index) - --c->index; - if (__bt_crsrdel(t, &delc)) + if ((ep = __bt_search(t, key, exactp)) == NULL) + return (NULL); + if (*exactp) { + if (F_ISSET(t, B_NODUPS)) { + *erval = *ep; + return (RET_SUCCESS); + } + + /* + * Walk backwards, as long as the entry matches and there are + * keys left in the tree. Save a copy of each match in case + * we go too far. + */ + save = *ep; + h = ep->page; + do { + if (save.page->pgno != ep->page->pgno) { + mpool_put(t->bt_mp, save.page, 0); + save = *ep; + } else + save.index = ep->index; + + /* + * Don't unpin the page the last (or original) match + * was on, but make sure it's unpinned if an error + * occurs. + */ + if (ep->index == 0) { + if (h->prevpg == P_INVALID) + break; + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, + h->prevpg, 0)) == NULL) { + if (h->pgno == save.page->pgno) + mpool_put(t->bt_mp, + save.page, 0); + return (RET_ERROR); + } + ep->page = h; + ep->index = NEXTINDEX(h); + } + --ep->index; + } while (__bt_cmp(t, key, ep) == 0); + + /* + * Reach here with the last page that was looked at pinned, + * which may or may not be the same as the last (or original) + * match page. If it's not useful, release it. + */ + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + + *erval = save; + return (RET_SUCCESS); + } + + /* If at the end of a page, find the next entry. */ + if (ep->index == NEXTINDEX(ep->page)) { + h = ep->page; + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) return (RET_ERROR); + ep->index = 0; + ep->page = h; } + *erval = *ep; return (RET_SUCCESS); } /* - * __BT_CRSRDEL -- Delete the record referenced by the cursor. + * __bt_setcur -- + * Set the cursor to an entry in the tree. * * Parameters: - * t: tree - * - * Returns: - * RET_ERROR, RET_SUCCESS + * t: the tree + * pgno: page number + * index: page index */ -int -__bt_crsrdel(t, c) +void +__bt_setcur(t, pgno, index) BTREE *t; - EPGNO *c; + pgno_t pgno; + u_int index; { - PAGE *h; - int status; + /* Lose any already deleted key. */ + if (t->bt_cursor.key.data != NULL) { + free(t->bt_cursor.key.data); + t->bt_cursor.key.size = 0; + t->bt_cursor.key.data = NULL; + } + F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); - CLR(t, B_DELCRSR); /* Don't try twice. */ - if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) - return (RET_ERROR); - status = __bt_dleaf(t, h, c->index); - mpool_put(t->bt_mp, h, MPOOL_DIRTY); - return (status); + /* Update the cursor. */ + t->bt_cursor.pg.pgno = pgno; + t->bt_cursor.pg.index = index; + F_SET(&t->bt_cursor, CURS_INIT); } diff --git a/lib/libc/db/btree/bt_split.c b/lib/libc/db/btree/bt_split.c index 4a572c0..1646d82 100644 --- a/lib/libc/db/btree/bt_split.c +++ b/lib/libc/db/btree/bt_split.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_split.c 8.3 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/types.h> @@ -79,13 +79,13 @@ u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; * RET_ERROR, RET_SUCCESS */ int -__bt_split(t, sp, key, data, flags, ilen, skip) +__bt_split(t, sp, key, data, flags, ilen, argskip) BTREE *t; PAGE *sp; const DBT *key, *data; int flags; size_t ilen; - indx_t skip; + u_int32_t argskip; { BINTERNAL *bi; BLEAF *bl, *tbl; @@ -93,7 +93,8 @@ __bt_split(t, sp, key, data, flags, ilen, skip) EPGNO *parent; PAGE *h, *l, *r, *lchild, *rchild; indx_t nxtindex; - size_t n, nbytes, nksize; + u_int16_t skip; + u_int32_t n, nbytes, nksize; int parentsplit; char *dest; @@ -103,6 +104,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) * skip set to the offset which should be used. Additionally, l and r * are pinned. */ + skip = argskip; h = sp->pgno == P_ROOT ? bt_root(t, sp, &l, &r, &skip, ilen) : bt_page(t, sp, &l, &r, &skip, ilen); @@ -115,14 +117,14 @@ __bt_split(t, sp, key, data, flags, ilen, skip) */ h->linp[skip] = h->upper -= ilen; dest = (char *)h + h->upper; - if (ISSET(t, R_RECNO)) + if (F_ISSET(t, R_RECNO)) WR_RLEAF(dest, data, flags) else WR_BLEAF(dest, key, data, flags) /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && - (ISSET(t, R_RECNO) ? + (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err2; @@ -230,7 +232,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) } /* Insert the key into the parent page. */ - switch(rchild->flags & P_TYPE) { + switch (rchild->flags & P_TYPE) { case P_BINTERNAL: h->linp[skip] = h->upper -= nbytes; dest = (char *)h + h->linp[skip]; @@ -295,7 +297,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip) /* If the root page was split, make it look right. */ if (sp->pgno == P_ROOT && - (ISSET(t, R_RECNO) ? + (F_ISSET(t, R_RECNO) ? bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) goto err1; @@ -388,6 +390,9 @@ bt_page(t, h, lp, rp, skip, ilen) mpool_put(t->bt_mp, r, 0); return (NULL); } +#ifdef PURIFY + memset(l, 0xff, t->bt_psize); +#endif l->pgno = h->pgno; l->nextpg = r->pgno; l->prevpg = h->prevpg; @@ -403,7 +408,7 @@ bt_page(t, h, lp, rp, skip, ilen) return (NULL); } tp->prevpg = r->pgno; - mpool_put(t->bt_mp, tp, 0); + mpool_put(t->bt_mp, tp, MPOOL_DIRTY); } /* @@ -534,7 +539,7 @@ bt_broot(t, h, l, r) { BINTERNAL *bi; BLEAF *bl; - size_t nbytes; + u_int32_t nbytes; char *dest; /* @@ -550,7 +555,7 @@ bt_broot(t, h, l, r) dest = (char *)h + h->upper; WR_BINTERNAL(dest, 0, l->pgno, 0); - switch(h->flags & P_TYPE) { + switch (h->flags & P_TYPE) { case P_BLEAF: bl = GETBLEAF(r, 0); nbytes = NBINTERNAL(bl->ksize); @@ -613,12 +618,12 @@ bt_psplit(t, h, l, r, pskip, ilen) { BINTERNAL *bi; BLEAF *bl; + CURSOR *c; RLEAF *rl; - EPGNO *c; PAGE *rval; void *src; indx_t full, half, nxt, off, skip, top, used; - size_t nbytes; + u_int32_t nbytes; int bigkeycnt, isbigkey; /* @@ -702,19 +707,16 @@ bt_psplit(t, h, l, r, pskip, ilen) * cursor is at or past the skipped slot, the cursor is incremented by * one. If the cursor is on the right page, it is decremented by the * number of records split to the left page. - * - * Don't bother checking for the B_SEQINIT flag, the page number will - * be P_INVALID. */ - c = &t->bt_bcursor; - if (c->pgno == h->pgno) { - if (c->index >= skip) - ++c->index; - if (c->index < nxt) /* Left page. */ - c->pgno = l->pgno; + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { + if (c->pg.index >= skip) + ++c->pg.index; + if (c->pg.index < nxt) /* Left page. */ + c->pg.pgno = l->pgno; else { /* Right page. */ - c->pgno = r->pgno; - c->index -= nxt; + c->pg.pgno = r->pgno; + c->pg.index -= nxt; } } diff --git a/lib/libc/db/btree/bt_utils.c b/lib/libc/db/btree/bt_utils.c index d2d1f73..9c1438e 100644 --- a/lib/libc/db/btree/bt_utils.c +++ b/lib/libc/db/btree/bt_utils.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1990, 1993 + * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +35,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94"; +static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94"; #endif /* LIBC_SCCS and not lint */ #include <sys/param.h> @@ -48,78 +48,91 @@ static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94"; #include "btree.h" /* - * __BT_RET -- Build return key/data pair as a result of search or scan. + * __bt_ret -- + * Build return key/data pair. * * Parameters: * t: tree - * d: LEAF to be returned to the user. + * e: key/data pair to be returned * key: user's key structure (NULL if not to be filled in) - * data: user's data structure + * rkey: memory area to hold key + * data: user's data structure (NULL if not to be filled in) + * rdata: memory area to hold data + * copy: always copy the key/data item * * Returns: * RET_SUCCESS, RET_ERROR. */ int -__bt_ret(t, e, key, data) +__bt_ret(t, e, key, rkey, data, rdata, copy) BTREE *t; EPG *e; - DBT *key, *data; + DBT *key, *rkey, *data, *rdata; + int copy; { - register BLEAF *bl; - register void *p; + BLEAF *bl; + void *p; bl = GETBLEAF(e->page, e->index); /* - * We always copy big keys/data to make them contigous. Otherwise, - * we leave the page pinned and don't copy unless the user specified + * We must copy big keys/data to make them contigous. Otherwise, + * leave the page pinned and don't copy unless the user specified * concurrent access. */ - if (bl->flags & P_BIGDATA) { - if (__ovfl_get(t, bl->bytes + bl->ksize, - &data->size, &t->bt_dbuf, &t->bt_dbufsz)) + if (key == NULL) + goto dataonly; + + if (bl->flags & P_BIGKEY) { + if (__ovfl_get(t, bl->bytes, + &key->size, &rkey->data, &rkey->size)) return (RET_ERROR); - data->data = t->bt_dbuf; - } else if (ISSET(t, B_DB_LOCK)) { - /* Use +1 in case the first record retrieved is 0 length. */ - if (bl->dsize + 1 > t->bt_dbufsz) { - if ((p = - (void *)realloc(t->bt_dbuf, bl->dsize + 1)) == NULL) + key->data = rkey->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + if (bl->ksize > rkey->size) { + p = (void *)(rkey->data == NULL ? + malloc(bl->ksize) : realloc(rkey->data, bl->ksize)); + if (p == NULL) return (RET_ERROR); - t->bt_dbuf = p; - t->bt_dbufsz = bl->dsize + 1; + rkey->data = p; + rkey->size = bl->ksize; } - memmove(t->bt_dbuf, bl->bytes + bl->ksize, bl->dsize); - data->size = bl->dsize; - data->data = t->bt_dbuf; + memmove(rkey->data, bl->bytes, bl->ksize); + key->size = bl->ksize; + key->data = rkey->data; } else { - data->size = bl->dsize; - data->data = bl->bytes + bl->ksize; + key->size = bl->ksize; + key->data = bl->bytes; } - if (key == NULL) +dataonly: + if (data == NULL) return (RET_SUCCESS); - if (bl->flags & P_BIGKEY) { - if (__ovfl_get(t, bl->bytes, - &key->size, &t->bt_kbuf, &t->bt_kbufsz)) + if (bl->flags & P_BIGDATA) { + if (__ovfl_get(t, bl->bytes + bl->ksize, + &data->size, &rdata->data, &rdata->size)) return (RET_ERROR); - key->data = t->bt_kbuf; - } else if (ISSET(t, B_DB_LOCK)) { - if (bl->ksize > t->bt_kbufsz) { - if ((p = - (void *)realloc(t->bt_kbuf, bl->ksize)) == NULL) + data->data = rdata->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (bl->dsize + 1 > rdata->size) { + p = (void *)(rdata->data == NULL ? + malloc(bl->dsize + 1) : + realloc(rdata->data, bl->dsize + 1)); + if (p == NULL) return (RET_ERROR); - t->bt_kbuf = p; - t->bt_kbufsz = bl->ksize; + rdata->data = p; + rdata->size = bl->dsize + 1; } - memmove(t->bt_kbuf, bl->bytes, bl->ksize); - key->size = bl->ksize; - key->data = t->bt_kbuf; + memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize); + data->size = bl->dsize; + data->data = rdata->data; } else { - key->size = bl->ksize; - key->data = bl->bytes; + data->size = bl->dsize; + data->data = bl->bytes + bl->ksize; } + return (RET_SUCCESS); } @@ -180,9 +193,9 @@ __bt_cmp(t, k1, e) if (bigkey) { if (__ovfl_get(t, bigkey, - &k2.size, &t->bt_dbuf, &t->bt_dbufsz)) + &k2.size, &t->bt_rdata.data, &t->bt_rdata.size)) return (RET_ERROR); - k2.data = t->bt_dbuf; + k2.data = t->bt_rdata.data; } return ((*t->bt_cmp)(k1, &k2)); } diff --git a/lib/libc/db/btree/btree.h b/lib/libc/db/btree/btree.h index dd798ec..36d35c9 100644 --- a/lib/libc/db/btree/btree.h +++ b/lib/libc/db/btree/btree.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -33,9 +33,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)btree.h 8.5 (Berkeley) 2/21/94 + * @(#)btree.h 8.11 (Berkeley) 8/17/94 */ +/* Macros to set/clear/test flags. */ +#define F_SET(p, f) (p)->flags |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) + #include <mpool.h> #define DEFMINKEYPAGE (2) /* Minimum keys per page */ @@ -79,8 +84,9 @@ typedef struct _page { } PAGE; /* First and next index. */ -#define BTDATAOFF (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ - sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) +#define BTDATAOFF \ + (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ + sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) #define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) /* @@ -99,9 +105,8 @@ typedef struct _page { * be manipulated without copying. (This presumes that 32 bit items can be * manipulated on this system.) */ -#define LALIGN(n) \ - (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) -#define NOVFLSIZE (sizeof(pgno_t) + sizeof(size_t)) +#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) +#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t)) /* * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} @@ -113,7 +118,7 @@ typedef struct _page { * some minor modifications of the above rule. */ typedef struct _binternal { - size_t ksize; /* key size */ + u_int32_t ksize; /* key size */ pgno_t pgno; /* page number stored on */ #define P_BIGDATA 0x01 /* overflow data */ #define P_BIGKEY 0x02 /* overflow key */ @@ -122,21 +127,21 @@ typedef struct _binternal { } BINTERNAL; /* Get the page's BINTERNAL structure at index indx. */ -#define GETBINTERNAL(pg, indx) \ +#define GETBINTERNAL(pg, indx) \ ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ -#define NBINTERNAL(len) \ - LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) +#define NBINTERNAL(len) \ + LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) /* Copy a BINTERNAL entry to the page. */ -#define WR_BINTERNAL(p, size, pgno, flags) { \ - *(size_t *)p = size; \ - p += sizeof(size_t); \ - *(pgno_t *)p = pgno; \ - p += sizeof(pgno_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ +#define WR_BINTERNAL(p, size, pgno, flags) { \ + *(u_int32_t *)p = size; \ + p += sizeof(u_int32_t); \ + *(pgno_t *)p = pgno; \ + p += sizeof(pgno_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ } /* @@ -149,78 +154,78 @@ typedef struct _rinternal { } RINTERNAL; /* Get the page's RINTERNAL structure at index indx. */ -#define GETRINTERNAL(pg, indx) \ +#define GETRINTERNAL(pg, indx) \ ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ -#define NRINTERNAL \ +#define NRINTERNAL \ LALIGN(sizeof(recno_t) + sizeof(pgno_t)) /* Copy a RINTERAL entry to the page. */ -#define WR_RINTERNAL(p, nrecs, pgno) { \ - *(recno_t *)p = nrecs; \ - p += sizeof(recno_t); \ - *(pgno_t *)p = pgno; \ +#define WR_RINTERNAL(p, nrecs, pgno) { \ + *(recno_t *)p = nrecs; \ + p += sizeof(recno_t); \ + *(pgno_t *)p = pgno; \ } /* For the btree leaf pages, the item is a key and data pair. */ typedef struct _bleaf { - size_t ksize; /* size of key */ - size_t dsize; /* size of data */ + u_int32_t ksize; /* size of key */ + u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA, P_BIGKEY */ char bytes[1]; /* data */ } BLEAF; /* Get the page's BLEAF structure at index indx. */ -#define GETBLEAF(pg, indx) \ +#define GETBLEAF(pg, indx) \ ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) /* Get the number of bytes in the user's key/data pair. */ -#define NBLEAFDBT(ksize, dsize) \ - LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \ +#define NBLEAFDBT(ksize, dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \ (ksize) + (dsize)) /* Copy a BLEAF entry to the page. */ -#define WR_BLEAF(p, key, data, flags) { \ - *(size_t *)p = key->size; \ - p += sizeof(size_t); \ - *(size_t *)p = data->size; \ - p += sizeof(size_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, key->data, key->size); \ - p += key->size; \ - memmove(p, data->data, data->size); \ +#define WR_BLEAF(p, key, data, flags) { \ + *(u_int32_t *)p = key->size; \ + p += sizeof(u_int32_t); \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, key->data, key->size); \ + p += key->size; \ + memmove(p, data->data, data->size); \ } /* For the recno leaf pages, the item is a data entry. */ typedef struct _rleaf { - size_t dsize; /* size of data */ + u_int32_t dsize; /* size of data */ u_char flags; /* P_BIGDATA */ char bytes[1]; } RLEAF; /* Get the page's RLEAF structure at index indx. */ -#define GETRLEAF(pg, indx) \ +#define GETRLEAF(pg, indx) \ ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) /* Get the number of bytes in the entry. */ #define NRLEAF(p) NRLEAFDBT((p)->dsize) /* Get the number of bytes from the user's data. */ -#define NRLEAFDBT(dsize) \ - LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize)) +#define NRLEAFDBT(dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize)) /* Copy a RLEAF entry to the page. */ -#define WR_RLEAF(p, data, flags) { \ - *(size_t *)p = data->size; \ - p += sizeof(size_t); \ - *(u_char *)p = flags; \ - p += sizeof(u_char); \ - memmove(p, data->data, data->size); \ +#define WR_RLEAF(p, data, flags) { \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, data->data, data->size); \ } /* @@ -232,12 +237,6 @@ typedef struct _rleaf { * record less than key in the tree so that descents work. Leaf page searches * must find the smallest record greater than key so that the returned index * is the record's correct position for insertion. - * - * One comment about cursors. The cursor key is never removed from the tree, - * even if deleted. This is because it is quite difficult to decide where the - * cursor should be when other keys have been inserted/deleted in the tree; - * duplicate keys make it impossible. This scheme does require extra work - * though, to make sure that we don't perform an operation on a deleted key. */ typedef struct _epgno { pgno_t pgno; /* the page number */ @@ -250,53 +249,90 @@ typedef struct _epg { } EPG; /* - * The metadata of the tree. The m_nrecs field is used only by the RECNO code. + * About cursors. The cursor (and the page that contained the key/data pair + * that it referenced) can be deleted, which makes things a bit tricky. If + * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set + * or there simply aren't any duplicates of the key) we copy the key that it + * referenced when it's deleted, and reacquire a new cursor key if the cursor + * is used again. If there are duplicates keys, we move to the next/previous + * key, and set a flag so that we know what happened. NOTE: if duplicate (to + * the cursor) keys are added to the tree during this process, it is undefined + * if they will be returned or not in a cursor scan. + * + * The flags determine the possible states of the cursor: + * + * CURS_INIT The cursor references *something*. + * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that + * we can reacquire the right position in the tree. + * CURS_AFTER, CURS_BEFORE + * The cursor was deleted, and now references a key/data pair + * that has not yet been returned, either before or after the + * deleted key/data pair. + * XXX + * This structure is broken out so that we can eventually offer multiple + * cursors as part of the DB interface. + */ +typedef struct _cursor { + EPGNO pg; /* B: Saved tree reference. */ + DBT key; /* B: Saved key, or key.data == NULL. */ + recno_t rcursor; /* R: recno cursor (1-based) */ + +#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ +#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ +#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ +#define CURS_INIT 0x08 /* RB: Cursor initialized. */ + u_int8_t flags; +} CURSOR; + +/* + * The metadata of the tree. The nrecs field is used only by the RECNO code. * This is because the btree doesn't really need it and it requires that every * put or delete call modify the metadata. */ typedef struct _btmeta { - u_int32_t m_magic; /* magic number */ - u_int32_t m_version; /* version */ - u_int32_t m_psize; /* page size */ - u_int32_t m_free; /* page number of first free page */ - u_int32_t m_nrecs; /* R: number of records */ + u_int32_t magic; /* magic number */ + u_int32_t version; /* version */ + u_int32_t psize; /* page size */ + u_int32_t free; /* page number of first free page */ + u_int32_t nrecs; /* R: number of records */ + #define SAVEMETA (B_NODUPS | R_RECNO) - u_int32_t m_flags; /* bt_flags & SAVEMETA */ - u_int32_t m_unused; /* unused */ + u_int32_t flags; /* bt_flags & SAVEMETA */ } BTMETA; /* The in-memory btree/recno data structure. */ typedef struct _btree { - MPOOL *bt_mp; /* memory pool cookie */ + MPOOL *bt_mp; /* memory pool cookie */ - DB *bt_dbp; /* pointer to enclosing DB */ + DB *bt_dbp; /* pointer to enclosing DB */ - EPG bt_cur; /* current (pinned) page */ - PAGE *bt_pinned; /* page pinned across calls */ + EPG bt_cur; /* current (pinned) page */ + PAGE *bt_pinned; /* page pinned across calls */ - EPGNO bt_bcursor; /* B: btree cursor */ - recno_t bt_rcursor; /* R: recno cursor (1-based) */ + CURSOR bt_cursor; /* cursor */ -#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL) -#define BT_CLR(t) (t->bt_sp = 0) - EPGNO *bt_stack; /* stack of parent pages */ - u_int bt_sp; /* current stack pointer */ - u_int bt_maxstack; /* largest stack */ +#define BT_PUSH(t, p, i) { \ + t->bt_sp->pgno = p; \ + t->bt_sp->index = i; \ + ++t->bt_sp; \ +} +#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp) +#define BT_CLR(t) (t->bt_sp = t->bt_stack) + EPGNO bt_stack[50]; /* stack of parent pages */ + EPGNO *bt_sp; /* current stack pointer */ - char *bt_kbuf; /* key buffer */ - size_t bt_kbufsz; /* key buffer size */ - char *bt_dbuf; /* data buffer */ - size_t bt_dbufsz; /* data buffer size */ + DBT bt_rkey; /* returned key */ + DBT bt_rdata; /* returned data */ - int bt_fd; /* tree file descriptor */ + int bt_fd; /* tree file descriptor */ - pgno_t bt_free; /* next free page */ + pgno_t bt_free; /* next free page */ u_int32_t bt_psize; /* page size */ - indx_t bt_ovflsize; /* cut-off for key/data overflow */ - int bt_lorder; /* byte order */ + indx_t bt_ovflsize; /* cut-off for key/data overflow */ + int bt_lorder; /* byte order */ /* sorted order */ enum { NOT, BACK, FORWARD } bt_order; - EPGNO bt_last; /* last insert */ + EPGNO bt_last; /* last insert */ /* B: key comparison function */ int (*bt_cmp) __P((const DBT *, const DBT *)); @@ -305,49 +341,43 @@ typedef struct _btree { /* R: recno input function */ int (*bt_irec) __P((struct _btree *, recno_t)); - FILE *bt_rfp; /* R: record FILE pointer */ - int bt_rfd; /* R: record file descriptor */ + FILE *bt_rfp; /* R: record FILE pointer */ + int bt_rfd; /* R: record file descriptor */ - caddr_t bt_cmap; /* R: current point in mapped space */ - caddr_t bt_smap; /* R: start of mapped space */ - caddr_t bt_emap; /* R: end of mapped space */ - size_t bt_msize; /* R: size of mapped region. */ + caddr_t bt_cmap; /* R: current point in mapped space */ + caddr_t bt_smap; /* R: start of mapped space */ + caddr_t bt_emap; /* R: end of mapped space */ + size_t bt_msize; /* R: size of mapped region. */ - recno_t bt_nrecs; /* R: number of records */ - size_t bt_reclen; /* R: fixed record length */ - u_char bt_bval; /* R: delimiting byte/pad character */ + recno_t bt_nrecs; /* R: number of records */ + size_t bt_reclen; /* R: fixed record length */ + u_char bt_bval; /* R: delimiting byte/pad character */ /* * NB: * B_NODUPS and R_RECNO are stored on disk, and may not be changed. */ -#define B_DELCRSR 0x00001 /* cursor has been deleted */ -#define B_INMEM 0x00002 /* in-memory tree */ -#define B_METADIRTY 0x00004 /* need to write metadata */ -#define B_MODIFIED 0x00008 /* tree modified */ -#define B_NEEDSWAP 0x00010 /* if byte order requires swapping */ +#define B_INMEM 0x00001 /* in-memory tree */ +#define B_METADIRTY 0x00002 /* need to write metadata */ +#define B_MODIFIED 0x00004 /* tree modified */ +#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ +#define B_RDONLY 0x00010 /* read-only tree */ + #define B_NODUPS 0x00020 /* no duplicate keys permitted */ -#define B_RDONLY 0x00040 /* read-only tree */ #define R_RECNO 0x00080 /* record oriented tree */ -#define B_SEQINIT 0x00100 /* sequential scan initialized */ - -#define R_CLOSEFP 0x00200 /* opened a file pointer */ -#define R_EOF 0x00400 /* end of input file reached. */ -#define R_FIXLEN 0x00800 /* fixed length records */ -#define R_MEMMAPPED 0x01000 /* memory mapped file. */ -#define R_INMEM 0x02000 /* in-memory file */ -#define R_MODIFIED 0x04000 /* modified file */ -#define R_RDONLY 0x08000 /* read-only file */ -#define B_DB_LOCK 0x10000 /* DB_LOCK specified. */ -#define B_DB_SHMEM 0x20000 /* DB_SHMEM specified. */ -#define B_DB_TXN 0x40000 /* DB_TXN specified. */ - - u_int32_t bt_flags; /* btree state */ +#define R_CLOSEFP 0x00040 /* opened a file pointer */ +#define R_EOF 0x00100 /* end of input file reached. */ +#define R_FIXLEN 0x00200 /* fixed length records */ +#define R_MEMMAPPED 0x00400 /* memory mapped file. */ +#define R_INMEM 0x00800 /* in-memory file */ +#define R_MODIFIED 0x01000 /* modified file */ +#define R_RDONLY 0x02000 /* read-only file */ + +#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ +#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ +#define B_DB_TXN 0x10000 /* DB_TXN specified. */ + u_int32_t flags; } BTREE; -#define SET(t, f) ((t)->bt_flags |= (f)) -#define CLR(t, f) ((t)->bt_flags &= ~(f)) -#define ISSET(t, f) ((t)->bt_flags & (f)) - #include "extern.h" diff --git a/lib/libc/db/btree/extern.h b/lib/libc/db/btree/extern.h index 4007bc7..ebd9c54 100644 --- a/lib/libc/db/btree/extern.h +++ b/lib/libc/db/btree/extern.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1991, 1993 + * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)extern.h 8.3 (Berkeley) 2/21/94 + * @(#)extern.h 8.10 (Berkeley) 7/20/94 */ int __bt_close __P((DB *)); @@ -39,9 +39,8 @@ int __bt_crsrdel __P((BTREE *, EPGNO *)); int __bt_defcmp __P((const DBT *, const DBT *)); size_t __bt_defpfx __P((const DBT *, const DBT *)); int __bt_delete __P((const DB *, const DBT *, u_int)); -int __bt_dleaf __P((BTREE *, PAGE *, int)); +int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int)); int __bt_fd __P((const DB *)); -EPG *__bt_first __P((BTREE *, const DBT *, int *)); int __bt_free __P((BTREE *, PAGE *)); int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); PAGE *__bt_new __P((BTREE *, pgno_t *)); @@ -49,15 +48,16 @@ void __bt_pgin __P((void *, pgno_t, void *)); void __bt_pgout __P((void *, pgno_t, void *)); int __bt_push __P((BTREE *, pgno_t, int)); int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); -int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *)); +int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int)); EPG *__bt_search __P((BTREE *, const DBT *, int *)); int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); +void __bt_setcur __P((BTREE *, pgno_t, u_int)); int __bt_split __P((BTREE *, PAGE *, - const DBT *, const DBT *, int, size_t, indx_t)); + const DBT *, const DBT *, int, size_t, u_int32_t)); int __bt_sync __P((const DB *, u_int)); int __ovfl_delete __P((BTREE *, void *)); -int __ovfl_get __P((BTREE *, void *, size_t *, char **, size_t *)); +int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *)); int __ovfl_put __P((BTREE *, const DBT *, pgno_t *)); #ifdef DEBUG |