summaryrefslogtreecommitdiffstats
path: root/lib/libc/db/btree
diff options
context:
space:
mode:
authorpst <pst@FreeBSD.org>1996-02-27 01:59:15 +0000
committerpst <pst@FreeBSD.org>1996-02-27 01:59:15 +0000
commit2fd01690e924737239e87aee0c117e3369de6c3e (patch)
tree223a02ae7b36628c291a44ca56e521009362cc01 /lib/libc/db/btree
parent584fea2f92d8bdbedf4d4b45cff7acd9602ae1ad (diff)
parentc2306789fe98946429af7462a2fd453454034a79 (diff)
downloadFreeBSD-src-2fd01690e924737239e87aee0c117e3369de6c3e.zip
FreeBSD-src-2fd01690e924737239e87aee0c117e3369de6c3e.tar.gz
This commit was generated by cvs2svn to compensate for changes in r14272,
which included commits to RCS files with non-trunk default branches.
Diffstat (limited to 'lib/libc/db/btree')
-rw-r--r--lib/libc/db/btree/Makefile.inc4
-rw-r--r--lib/libc/db/btree/bt_conv.c36
-rw-r--r--lib/libc/db/btree/bt_debug.c44
-rw-r--r--lib/libc/db/btree/bt_delete.c665
-rw-r--r--lib/libc/db/btree/bt_get.c141
-rw-r--r--lib/libc/db/btree/bt_overflow.c26
-rw-r--r--lib/libc/db/btree/bt_page.c21
-rw-r--r--lib/libc/db/btree/bt_put.c106
-rw-r--r--lib/libc/db/btree/bt_search.c130
-rw-r--r--lib/libc/db/btree/bt_seq.c380
-rw-r--r--lib/libc/db/btree/bt_split.c52
-rw-r--r--lib/libc/db/btree/bt_utils.c103
-rw-r--r--lib/libc/db/btree/btree.h262
-rw-r--r--lib/libc/db/btree/extern.h14
14 files changed, 1148 insertions, 836 deletions
diff --git a/lib/libc/db/btree/Makefile.inc b/lib/libc/db/btree/Makefile.inc
index 71f8dfc..8ed7649 100644
--- a/lib/libc/db/btree/Makefile.inc
+++ b/lib/libc/db/btree/Makefile.inc
@@ -1,7 +1,7 @@
-# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+# @(#)Makefile.inc 8.2 (Berkeley) 7/14/94
.PATH: ${.CURDIR}/db/btree
SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \
bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \
- bt_stack.c bt_utils.c
+ bt_utils.c
diff --git a/lib/libc/db/btree/bt_conv.c b/lib/libc/db/btree/bt_conv.c
index eb49340..1cb208b1 100644
--- a/lib/libc/db/btree/bt_conv.c
+++ b/lib/libc/db/btree/bt_conv.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_conv.c 8.2 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
@@ -68,7 +68,7 @@ __bt_pgin(t, pg, pp)
u_char flags;
char *p;
- if (!ISSET(((BTREE *)t), B_NEEDSWAP))
+ if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
@@ -89,7 +89,7 @@ __bt_pgin(t, pg, pp)
M_16_SWAP(h->linp[i]);
p = (char *)GETBINTERNAL(h, i);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(u_char *)p & P_BIGKEY) {
@@ -104,9 +104,9 @@ __bt_pgin(t, pg, pp)
M_16_SWAP(h->linp[i]);
p = (char *)GETBLEAF(h, i);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
flags = *(u_char *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(u_char);
@@ -116,7 +116,7 @@ __bt_pgin(t, pg, pp)
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
@@ -136,7 +136,7 @@ __bt_pgout(t, pg, pp)
u_char flags;
char *p;
- if (!ISSET(((BTREE *)t), B_NEEDSWAP))
+ if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
@@ -149,7 +149,7 @@ __bt_pgout(t, pg, pp)
for (i = 0; i < top; i++) {
p = (char *)GETBINTERNAL(h, i);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(u_char *)p & P_BIGKEY) {
@@ -164,9 +164,9 @@ __bt_pgout(t, pg, pp)
for (i = 0; i < top; i++) {
p = (char *)GETBLEAF(h, i);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
flags = *(u_char *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(u_char);
@@ -176,7 +176,7 @@ __bt_pgout(t, pg, pp)
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
- p += sizeof(size_t);
+ p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
@@ -206,16 +206,16 @@ mswap(pg)
char *p;
p = (char *)pg;
- P_32_SWAP(p); /* m_magic */
+ P_32_SWAP(p); /* magic */
p += sizeof(u_int32_t);
- P_32_SWAP(p); /* m_version */
+ P_32_SWAP(p); /* version */
p += sizeof(u_int32_t);
- P_32_SWAP(p); /* m_psize */
+ P_32_SWAP(p); /* psize */
p += sizeof(u_int32_t);
- P_32_SWAP(p); /* m_free */
+ P_32_SWAP(p); /* free */
p += sizeof(u_int32_t);
- P_32_SWAP(p); /* m_nrecs */
+ P_32_SWAP(p); /* nrecs */
p += sizeof(u_int32_t);
- P_32_SWAP(p); /* m_flags */
+ P_32_SWAP(p); /* flags */
p += sizeof(u_int32_t);
}
diff --git a/lib/libc/db/btree/bt_debug.c b/lib/libc/db/btree/bt_debug.c
index 5f9ac1d..3aefbe7 100644
--- a/lib/libc/db/btree/bt_debug.c
+++ b/lib/libc/db/btree/bt_debug.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_debug.c 8.2 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
@@ -65,24 +65,22 @@ __bt_dump(dbp)
t = dbp->internal;
(void)fprintf(stderr, "%s: pgsz %d",
- ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize);
- if (ISSET(t, R_RECNO))
+ F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize);
+ if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " keys %lu", t->bt_nrecs);
#undef X
#define X(flag, name) \
- if (ISSET(t, flag)) { \
+ if (F_ISSET(t, flag)) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
- if (t->bt_flags) {
+ if (t->flags != 0) {
sep = " flags (";
- X(B_DELCRSR, "DELCRSR");
X(R_FIXLEN, "FIXLEN");
X(B_INMEM, "INMEM");
X(B_NODUPS, "NODUPS");
X(B_RDONLY, "RDONLY");
X(R_RECNO, "RECNO");
- X(B_SEQINIT, "SEQINIT");
X(B_METADIRTY,"METADIRTY");
(void)fprintf(stderr, ")\n");
}
@@ -108,19 +106,19 @@ __bt_dmpage(h)
char *sep;
m = (BTMETA *)h;
- (void)fprintf(stderr, "magic %lx\n", m->m_magic);
- (void)fprintf(stderr, "version %lu\n", m->m_version);
- (void)fprintf(stderr, "psize %lu\n", m->m_psize);
- (void)fprintf(stderr, "free %lu\n", m->m_free);
- (void)fprintf(stderr, "nrecs %lu\n", m->m_nrecs);
- (void)fprintf(stderr, "flags %lu", m->m_flags);
+ (void)fprintf(stderr, "magic %lx\n", m->magic);
+ (void)fprintf(stderr, "version %lu\n", m->version);
+ (void)fprintf(stderr, "psize %lu\n", m->psize);
+ (void)fprintf(stderr, "free %lu\n", m->free);
+ (void)fprintf(stderr, "nrecs %lu\n", m->nrecs);
+ (void)fprintf(stderr, "flags %lu", m->flags);
#undef X
#define X(flag, name) \
- if (m->m_flags & flag) { \
+ if (m->flags & flag) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
- if (m->m_flags) {
+ if (m->flags) {
sep = " (";
X(B_NODUPS, "NODUPS");
X(R_RECNO, "RECNO");
@@ -192,7 +190,7 @@ __bt_dpage(h)
h->lower, h->upper, top);
for (cur = 0; cur < top; cur++) {
(void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]);
- switch(h->flags & P_TYPE) {
+ switch (h->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(h, cur);
(void)fprintf(stderr,
@@ -214,14 +212,14 @@ __bt_dpage(h)
(void)fprintf(stderr,
"big key page %lu size %u/",
*(pgno_t *)bl->bytes,
- *(size_t *)(bl->bytes + sizeof(pgno_t)));
+ *(u_int32_t *)(bl->bytes + sizeof(pgno_t)));
else if (bl->ksize)
(void)fprintf(stderr, "%s/", bl->bytes);
if (bl->flags & P_BIGDATA)
(void)fprintf(stderr,
"big data page %lu size %u",
*(pgno_t *)(bl->bytes + bl->ksize),
- *(size_t *)(bl->bytes + bl->ksize +
+ *(u_int32_t *)(bl->bytes + bl->ksize +
sizeof(pgno_t)));
else if (bl->dsize)
(void)fprintf(stderr, "%.*s",
@@ -233,7 +231,7 @@ __bt_dpage(h)
(void)fprintf(stderr,
"big data page %lu size %u",
*(pgno_t *)rl->bytes,
- *(size_t *)(rl->bytes + sizeof(pgno_t)));
+ *(u_int32_t *)(rl->bytes + sizeof(pgno_t)));
else if (rl->dsize)
(void)fprintf(stderr,
"%.*s", (int)rl->dsize, rl->bytes);
@@ -267,7 +265,7 @@ __bt_stat(dbp)
pcont = pinternal = pleaf = 0;
nkeys = ifree = lfree = 0;
for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) {
- switch(h->flags & P_TYPE) {
+ switch (h->flags & P_TYPE) {
case P_BINTERNAL:
case P_RINTERNAL:
++pinternal;
@@ -295,7 +293,7 @@ __bt_stat(dbp)
(void)mpool_put(t->bt_mp, h, 0);
break;
}
- i = ISSET(t, R_RECNO) ?
+ i = F_ISSET(t, R_RECNO) ?
GETRINTERNAL(h, 0)->pgno :
GETBINTERNAL(h, 0)->pgno;
(void)mpool_put(t->bt_mp, h, 0);
@@ -303,7 +301,7 @@ __bt_stat(dbp)
(void)fprintf(stderr, "%d level%s with %ld keys",
levels, levels == 1 ? "" : "s", nkeys);
- if (ISSET(t, R_RECNO))
+ if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " (%ld header count)", t->bt_nrecs);
(void)fprintf(stderr,
"\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n",
diff --git a/lib/libc/db/btree/bt_delete.c b/lib/libc/db/btree/bt_delete.c
index 5dba534..ece1ab6 100644
--- a/lib/libc/db/btree/bt_delete.c
+++ b/lib/libc/db/btree/bt_delete.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -47,18 +47,17 @@ static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94";
#include <db.h>
#include "btree.h"
-static int bt_bdelete __P((BTREE *, const DBT *));
+static int __bt_bdelete __P((BTREE *, const DBT *));
+static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
+static int __bt_pdelete __P((BTREE *, PAGE *));
+static int __bt_relink __P((BTREE *, PAGE *));
+static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
/*
- * __BT_DELETE -- Delete the item(s) referenced by a key.
+ * __bt_delete
+ * Delete the item(s) referenced by a key.
*
- * Parameters:
- * dbp: pointer to access method
- * key: key to delete
- * flags: R_CURSOR if deleting what the cursor references
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
+ * Return RET_SPECIAL if the key is not found.
*/
int
__bt_delete(dbp, key, flags)
@@ -67,6 +66,8 @@ __bt_delete(dbp, key, flags)
u_int flags;
{
BTREE *t;
+ CURSOR *c;
+ PAGE *h;
int status;
t = dbp->internal;
@@ -77,242 +78,433 @@ __bt_delete(dbp, key, flags)
t->bt_pinned = NULL;
}
- if (ISSET(t, B_RDONLY)) {
+ /* Check for change to a read-only tree. */
+ if (F_ISSET(t, B_RDONLY)) {
errno = EPERM;
return (RET_ERROR);
}
- switch(flags) {
+ switch (flags) {
case 0:
- status = bt_bdelete(t, key);
+ status = __bt_bdelete(t, key);
break;
case R_CURSOR:
/*
- * If flags is R_CURSOR, delete the cursor; must already have
- * started a scan and not have already deleted the record. For
- * the delete cursor bit to have been set requires that the
- * scan be initialized, so no reason to check.
+ * If flags is R_CURSOR, delete the cursor. Must already
+ * have started a scan and not have already deleted it.
*/
- if (!ISSET(t, B_SEQINIT))
- goto einval;
- status = ISSET(t, B_DELCRSR) ?
- RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
- break;
+ c = &t->bt_cursor;
+ if (F_ISSET(c, CURS_INIT)) {
+ if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
+ return (RET_ERROR);
+
+ /*
+ * If the page is about to be emptied, we'll need to
+ * delete it, which means we have to acquire a stack.
+ */
+ if (NEXTINDEX(h) == 1)
+ if (__bt_stkacq(t, &h, &t->bt_cursor))
+ return (RET_ERROR);
+
+ status = __bt_dleaf(t, NULL, h, c->pg.index);
+
+ if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
+ if (__bt_pdelete(t, h))
+ return (RET_ERROR);
+ } else
+ mpool_put(t->bt_mp,
+ h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
+ break;
+ }
+ /* FALLTHROUGH */
default:
-einval: errno = EINVAL;
+ errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS)
- SET(t, B_MODIFIED);
+ F_SET(t, B_MODIFIED);
return (status);
}
/*
- * BT_BDELETE -- Delete all key/data pairs matching the specified key.
+ * __bt_stkacq --
+ * Acquire a stack so we can delete a cursor entry.
*
* Parameters:
- * tree: tree
+ * t: tree
+ * hp: pointer to current, pinned PAGE pointer
+ * c: pointer to the cursor
+ *
+ * Returns:
+ * 0 on success, 1 on failure
+ */
+static int
+__bt_stkacq(t, hp, c)
+ BTREE *t;
+ PAGE **hp;
+ CURSOR *c;
+{
+ BINTERNAL *bi;
+ EPG *e;
+ EPGNO *parent;
+ PAGE *h;
+ indx_t index;
+ pgno_t pgno;
+ recno_t nextpg, prevpg;
+ int exact, level;
+
+ /*
+ * Find the first occurrence of the key in the tree. Toss the
+ * currently locked page so we don't hit an already-locked page.
+ */
+ h = *hp;
+ mpool_put(t->bt_mp, h, 0);
+ if ((e = __bt_search(t, &c->key, &exact)) == NULL)
+ return (1);
+ h = e->page;
+
+ /* See if we got it in one shot. */
+ if (h->pgno == c->pg.pgno)
+ goto ret;
+
+ /*
+ * Move right, looking for the page. At each move we have to move
+ * up the stack until we don't have to move to the next page. If
+ * we have to change pages at an internal level, we have to fix the
+ * stack back up.
+ */
+ while (h->pgno != c->pg.pgno) {
+ if ((nextpg = h->nextpg) == P_INVALID)
+ break;
+ mpool_put(t->bt_mp, h, 0);
+
+ /* Move up the stack. */
+ for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
+ /* Get the parent page. */
+ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
+ return (1);
+
+ /* Move to the next index. */
+ if (parent->index != NEXTINDEX(h) - 1) {
+ index = parent->index + 1;
+ BT_PUSH(t, h->pgno, index);
+ break;
+ }
+ mpool_put(t->bt_mp, h, 0);
+ }
+
+ /* Restore the stack. */
+ while (level--) {
+ /* Push the next level down onto the stack. */
+ bi = GETBINTERNAL(h, index);
+ pgno = bi->pgno;
+ BT_PUSH(t, pgno, 0);
+
+ /* Lose the currently pinned page. */
+ mpool_put(t->bt_mp, h, 0);
+
+ /* Get the next level down. */
+ if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
+ return (1);
+ index = 0;
+ }
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
+ return (1);
+ }
+
+ if (h->pgno == c->pg.pgno)
+ goto ret;
+
+ /* Reacquire the original stack. */
+ mpool_put(t->bt_mp, h, 0);
+ if ((e = __bt_search(t, &c->key, &exact)) == NULL)
+ return (1);
+ h = e->page;
+
+ /*
+ * Move left, looking for the page. At each move we have to move
+ * up the stack until we don't have to change pages to move to the
+ * next page. If we have to change pages at an internal level, we
+ * have to fix the stack back up.
+ */
+ while (h->pgno != c->pg.pgno) {
+ if ((prevpg = h->prevpg) == P_INVALID)
+ break;
+ mpool_put(t->bt_mp, h, 0);
+
+ /* Move up the stack. */
+ for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
+ /* Get the parent page. */
+ if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
+ return (1);
+
+ /* Move to the next index. */
+ if (parent->index != 0) {
+ index = parent->index - 1;
+ BT_PUSH(t, h->pgno, index);
+ break;
+ }
+ mpool_put(t->bt_mp, h, 0);
+ }
+
+ /* Restore the stack. */
+ while (level--) {
+ /* Push the next level down onto the stack. */
+ bi = GETBINTERNAL(h, index);
+ pgno = bi->pgno;
+
+ /* Lose the currently pinned page. */
+ mpool_put(t->bt_mp, h, 0);
+
+ /* Get the next level down. */
+ if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
+ return (1);
+
+ index = NEXTINDEX(h) - 1;
+ BT_PUSH(t, pgno, index);
+ }
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
+ return (1);
+ }
+
+
+ret: mpool_put(t->bt_mp, h, 0);
+ return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
+}
+
+/*
+ * __bt_bdelete --
+ * Delete all key/data pairs matching the specified key.
+ *
+ * Parameters:
+ * t: tree
* key: key to delete
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
static int
-bt_bdelete(t, key)
+__bt_bdelete(t, key)
BTREE *t;
const DBT *key;
{
- EPG *e, save;
+ EPG *e;
PAGE *h;
- pgno_t cpgno, pg;
- indx_t cindex;
- int deleted, dirty1, dirty2, exact;
+ int deleted, exact, redo;
+
+ deleted = 0;
/* Find any matching record; __bt_search pins the page. */
- if ((e = __bt_search(t, key, &exact)) == NULL)
- return (RET_ERROR);
+loop: if ((e = __bt_search(t, key, &exact)) == NULL)
+ return (deleted ? RET_SUCCESS : RET_ERROR);
if (!exact) {
mpool_put(t->bt_mp, e->page, 0);
- return (RET_SPECIAL);
+ return (deleted ? RET_SUCCESS : RET_SPECIAL);
}
/*
- * Delete forward, then delete backward, from the found key. The
- * ordering is so that the deletions don't mess up the page refs.
- * The first loop deletes the key from the original page, the second
- * unpins the original page. In the first loop, dirty1 is set if
- * the original page is modified, and dirty2 is set if any subsequent
- * pages are modified. In the second loop, dirty1 starts off set if
- * the original page has been modified, and is set if any subsequent
- * pages are modified.
- *
- * If find the key referenced by the cursor, don't delete it, just
- * flag it for future deletion. The cursor page number is P_INVALID
- * unless the sequential scan is initialized, so no reason to check.
- * A special case is when the already deleted cursor record was the
- * only record found. If so, then the delete opertion fails as no
- * records were deleted.
- *
- * Cycle in place in the current page until the current record doesn't
- * match the key or the page is empty. If the latter, walk forward,
- * skipping empty pages and repeating until a record doesn't match
- * the key or the end of the tree is reached.
+ * Delete forward, then delete backward, from the found key. If
+ * there are duplicates and we reach either side of the page, do
+ * the key search again, so that we get them all.
*/
- cpgno = t->bt_bcursor.pgno;
- cindex = t->bt_bcursor.index;
- save = *e;
- dirty1 = 0;
- for (h = e->page, deleted = 0;;) {
- dirty2 = 0;
- do {
- if (h->pgno == cpgno && e->index == cindex) {
- if (!ISSET(t, B_DELCRSR)) {
- SET(t, B_DELCRSR);
- deleted = 1;
- }
- ++e->index;
- } else {
- if (__bt_dleaf(t, h, e->index)) {
- if (h->pgno != save.page->pgno)
- mpool_put(t->bt_mp, h, dirty2);
- mpool_put(t->bt_mp, save.page, dirty1);
+ redo = 0;
+ h = e->page;
+ do {
+ if (__bt_dleaf(t, key, h, e->index)) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_ERROR);
+ }
+ if (F_ISSET(t, B_NODUPS)) {
+ if (NEXTINDEX(h) == 0) {
+ if (__bt_pdelete(t, h))
return (RET_ERROR);
- }
- if (h->pgno == save.page->pgno)
- dirty1 = MPOOL_DIRTY;
- else
- dirty2 = MPOOL_DIRTY;
- deleted = 1;
- }
- } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
-
- /*
- * Quit if didn't find a match, no next page, or first key on
- * the next page doesn't match. Don't unpin the original page
- * unless an error occurs.
- */
- if (e->index < NEXTINDEX(h))
- break;
- for (;;) {
- if ((pg = h->nextpg) == P_INVALID)
- goto done1;
- if (h->pgno != save.page->pgno)
- mpool_put(t->bt_mp, h, dirty2);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
- mpool_put(t->bt_mp, save.page, dirty1);
- return (RET_ERROR);
- }
- if (NEXTINDEX(h) != 0) {
- e->page = h;
- e->index = 0;
- break;
- }
+ } else
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ return (RET_SUCCESS);
}
+ deleted = 1;
+ } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
+ /* Check for right-hand edge of the page. */
+ if (e->index == NEXTINDEX(h))
+ redo = 1;
+
+ /* Delete from the key to the beginning of the page. */
+ while (e->index-- > 0) {
if (__bt_cmp(t, key, e) != 0)
break;
+ if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_ERROR);
+ }
+ if (e->index == 0)
+ redo = 1;
}
- /*
- * Reach here with the original page and the last page referenced
- * pinned (they may be the same). Release it if not the original.
- */
-done1: if (h->pgno != save.page->pgno)
- mpool_put(t->bt_mp, h, dirty2);
+ /* Check for an empty page. */
+ if (NEXTINDEX(h) == 0) {
+ if (__bt_pdelete(t, h))
+ return (RET_ERROR);
+ goto loop;
+ }
+
+ /* Put the page. */
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+
+ if (redo)
+ goto loop;
+ return (RET_SUCCESS);
+}
+
+/*
+ * __bt_pdelete --
+ * Delete a single page from the tree.
+ *
+ * Parameters:
+ * t: tree
+ * h: leaf page
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ *
+ * Side-effects:
+ * mpool_put's the page
+ */
+static int
+__bt_pdelete(t, h)
+ BTREE *t;
+ PAGE *h;
+{
+ BINTERNAL *bi;
+ PAGE *pg;
+ EPGNO *parent;
+ indx_t cnt, index, *ip, offset;
+ u_int32_t nksize;
+ char *from;
/*
- * Walk backwards from the record previous to the record returned by
- * __bt_search, skipping empty pages, until a record doesn't match
- * the key or reach the beginning of the tree.
+ * Walk the parent page stack -- a LIFO stack of the pages that were
+ * traversed when we searched for the page where the delete occurred.
+ * Each stack entry is a page number and a page index offset. The
+ * offset is for the page traversed on the search. We've just deleted
+ * a page, so we have to delete the key from the parent page.
+ *
+ * If the delete from the parent page makes it empty, this process may
+ * continue all the way up the tree. We stop if we reach the root page
+ * (which is never deleted, it's just not worth the effort) or if the
+ * delete does not empty the page.
*/
- *e = save;
- for (;;) {
- if (e->index)
- --e->index;
- for (h = e->page; e->index; --e->index) {
- if (__bt_cmp(t, key, e) != 0)
- goto done2;
- if (h->pgno == cpgno && e->index == cindex) {
- if (!ISSET(t, B_DELCRSR)) {
- SET(t, B_DELCRSR);
- deleted = 1;
- }
+ while ((parent = BT_POP(t)) != NULL) {
+ /* Get the parent page. */
+ if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
+ return (RET_ERROR);
+
+ index = parent->index;
+ bi = GETBINTERNAL(pg, index);
+
+ /* Free any overflow pages. */
+ if (bi->flags & P_BIGKEY &&
+ __ovfl_delete(t, bi->bytes) == RET_ERROR) {
+ mpool_put(t->bt_mp, pg, 0);
+ return (RET_ERROR);
+ }
+
+ /*
+ * Free the parent if it has only the one key and it's not the
+ * root page. If it's the rootpage, turn it back into an empty
+ * leaf page.
+ */
+ if (NEXTINDEX(pg) == 1)
+ if (pg->pgno == P_ROOT) {
+ pg->lower = BTDATAOFF;
+ pg->upper = t->bt_psize;
+ pg->flags = P_BLEAF;
} else {
- if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
- mpool_put(t->bt_mp, h, dirty1);
+ if (__bt_relink(t, pg) || __bt_free(t, pg))
return (RET_ERROR);
- }
- if (h->pgno == save.page->pgno)
- dirty1 = MPOOL_DIRTY;
- deleted = 1;
+ continue;
}
+ else {
+ /* Pack remaining key items at the end of the page. */
+ nksize = NBINTERNAL(bi->ksize);
+ from = (char *)pg + pg->upper;
+ memmove(from + nksize, from, (char *)bi - from);
+ pg->upper += nksize;
+
+ /* Adjust indices' offsets, shift the indices down. */
+ offset = pg->linp[index];
+ for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip)
+ if (ip[0] < offset)
+ ip[0] += nksize;
+ for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip)
+ ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
+ pg->lower -= sizeof(indx_t);
}
- if ((pg = h->prevpg) == P_INVALID)
- goto done2;
- mpool_put(t->bt_mp, h, dirty1);
- dirty1 = 0;
- if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- e->index = NEXTINDEX(e->page);
+ mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
+ break;
}
- /*
- * Reach here with the last page that was looked at pinned. Release
- * it.
- */
-done2: mpool_put(t->bt_mp, h, dirty1);
- return (deleted ? RET_SUCCESS : RET_SPECIAL);
+ /* Free the leaf page, as long as it wasn't the root. */
+ if (h->pgno == P_ROOT) {
+ mpool_put(t->bt_mp, h, MPOOL_DIRTY);
+ return (RET_SUCCESS);
+ }
+ return (__bt_relink(t, h) || __bt_free(t, h));
}
/*
- * __BT_DLEAF -- Delete a single record from a leaf page.
+ * __bt_dleaf --
+ * Delete a single record from a leaf page.
*
* Parameters:
* t: tree
- * index: index on current page to delete
+ * key: referenced key
+ * h: page
+ * index: index on page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
-__bt_dleaf(t, h, index)
+__bt_dleaf(t, key, h, index)
BTREE *t;
+ const DBT *key;
PAGE *h;
- indx_t index;
+ u_int index;
{
- register BLEAF *bl;
- register indx_t cnt, *ip, offset;
- register size_t nbytes;
- char *from;
+ BLEAF *bl;
+ indx_t cnt, *ip, offset;
+ u_int32_t nbytes;
void *to;
+ char *from;
- /*
- * Delete a record from a btree leaf page. Internal records are never
- * deleted from internal pages, regardless of the records that caused
- * them to be added being deleted. Pages made empty by deletion are
- * not reclaimed. They are, however, made available for reuse.
- *
- * Pack the remaining entries at the end of the page, shift the indices
- * down, overwriting the deleted record and its index. If the record
- * uses overflow pages, make them available for reuse.
- */
+ /* If this record is referenced by the cursor, delete the cursor. */
+ if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
+ !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
+ t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index &&
+ __bt_curdel(t, key, h, index))
+ return (RET_ERROR);
+
+ /* If the entry uses overflow pages, make them available for reuse. */
to = bl = GETBLEAF(h, index);
if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
return (RET_ERROR);
if (bl->flags & P_BIGDATA &&
__ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
return (RET_ERROR);
- nbytes = NBLEAF(bl);
- /*
- * Compress the key/data pairs. Compress and adjust the [BR]LEAF
- * offsets. Reset the headers.
- */
+ /* Pack the remaining key/data items at the end of the page. */
+ nbytes = NBLEAF(bl);
from = (char *)h + h->upper;
memmove(from + nbytes, from, (char *)to - from);
h->upper += nbytes;
+ /* Adjust the indices' offsets, shift the indices down. */
offset = h->linp[index];
for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
if (ip[0] < offset)
@@ -320,5 +512,146 @@ __bt_dleaf(t, h, index)
for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
h->lower -= sizeof(indx_t);
+
+ /* If the cursor is on this page, adjust it as necessary. */
+ if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
+ !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
+ t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index)
+ --t->bt_cursor.pg.index;
+
return (RET_SUCCESS);
}
+
+/*
+ * __bt_curdel --
+ * Delete the cursor.
+ *
+ * Parameters:
+ * t: tree
+ * key: referenced key (or NULL)
+ * h: page
+ * index: index on page to delete
+ *
+ * Returns:
+ * RET_SUCCESS, RET_ERROR.
+ */
+static int
+__bt_curdel(t, key, h, index)
+ BTREE *t;
+ const DBT *key;
+ PAGE *h;
+ u_int index;
+{
+ CURSOR *c;
+ EPG e;
+ PAGE *pg;
+ int curcopy, status;
+
+ /*
+ * If there are duplicates, move forward or backward to one.
+ * Otherwise, copy the key into the cursor area.
+ */
+ c = &t->bt_cursor;
+ F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
+
+ curcopy = 0;
+ if (!F_ISSET(t, B_NODUPS)) {
+ /*
+ * We're going to have to do comparisons. If we weren't
+ * provided a copy of the key, i.e. the user is deleting
+ * the current cursor position, get one.
+ */
+ if (key == NULL) {
+ e.page = h;
+ e.index = index;
+ if ((status = __bt_ret(t, &e,
+ &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
+ return (status);
+ curcopy = 1;
+ key = &c->key;
+ }
+ /* Check previous key, if not at the beginning of the page. */
+ if (index > 0) {
+ e.page = h;
+ e.index = index - 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ F_SET(c, CURS_BEFORE);
+ goto dup2;
+ }
+ }
+ /* Check next key, if not at the end of the page. */
+ if (index < NEXTINDEX(h) - 1) {
+ e.page = h;
+ e.index = index + 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ F_SET(c, CURS_AFTER);
+ goto dup2;
+ }
+ }
+ /* Check previous key if at the beginning of the page. */
+ if (index == 0 && h->prevpg != P_INVALID) {
+ if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
+ return (RET_ERROR);
+ e.page = pg;
+ e.index = NEXTINDEX(pg) - 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ F_SET(c, CURS_BEFORE);
+ goto dup1;
+ }
+ mpool_put(t->bt_mp, pg, 0);
+ }
+ /* Check next key if at the end of the page. */
+ if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
+ if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
+ return (RET_ERROR);
+ e.page = pg;
+ e.index = 0;
+ if (__bt_cmp(t, key, &e) == 0) {
+ F_SET(c, CURS_AFTER);
+dup1: mpool_put(t->bt_mp, pg, 0);
+dup2: c->pg.pgno = e.page->pgno;
+ c->pg.index = e.index;
+ return (RET_SUCCESS);
+ }
+ mpool_put(t->bt_mp, pg, 0);
+ }
+ }
+ e.page = h;
+ e.index = index;
+ if (curcopy || (status =
+ __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
+ F_SET(c, CURS_ACQUIRE);
+ return (RET_SUCCESS);
+ }
+ return (status);
+}
+
+/*
+ * __bt_relink --
+ * Link around a deleted page.
+ *
+ * Parameters:
+ * t: tree
+ * h: page to be deleted
+ */
+static int
+__bt_relink(t, h)
+ BTREE *t;
+ PAGE *h;
+{
+ PAGE *pg;
+
+ if (h->nextpg != P_INVALID) {
+ if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
+ return (RET_ERROR);
+ pg->prevpg = h->prevpg;
+ mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
+ }
+ if (h->prevpg != P_INVALID) {
+ if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
+ return (RET_ERROR);
+ pg->nextpg = h->nextpg;
+ mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
+ }
+ return (0);
+}
diff --git a/lib/libc/db/btree/bt_get.c b/lib/libc/db/btree/bt_get.c
index 28b2d60..74824c7 100644
--- a/lib/libc/db/btree/bt_get.c
+++ b/lib/libc/db/btree/bt_get.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_get.c 8.2 (Berkeley) 9/7/93";
+static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -91,148 +91,15 @@ __bt_get(dbp, key, data, flags)
return (RET_SPECIAL);
}
- /*
- * A special case is if we found the record but it's flagged for
- * deletion. In this case, we want to find another record with the
- * same key, if it exists. Rather than look around the tree we call
- * __bt_first and have it redo the search, as __bt_first will not
- * return keys marked for deletion. Slow, but should never happen.
- */
- if (ISSET(t, B_DELCRSR) && e->page->pgno == t->bt_bcursor.pgno &&
- e->index == t->bt_bcursor.index) {
- mpool_put(t->bt_mp, e->page, 0);
- if ((e = __bt_first(t, key, &exact)) == NULL)
- return (RET_ERROR);
- if (!exact)
- return (RET_SPECIAL);
- }
+ status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0);
- status = __bt_ret(t, e, NULL, data);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
- if (ISSET(t, B_DB_LOCK))
+ if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}
-
-/*
- * __BT_FIRST -- Find the first entry.
- *
- * Parameters:
- * t: the tree
- * key: the key
- *
- * Returns:
- * The first entry in the tree greater than or equal to key.
- */
-EPG *
-__bt_first(t, key, exactp)
- BTREE *t;
- const DBT *key;
- int *exactp;
-{
- register PAGE *h;
- register EPG *e;
- EPG save;
- pgno_t cpgno, pg;
- indx_t cindex;
- int found;
-
- /*
- * Find any matching record; __bt_search pins the page. Only exact
- * matches are tricky, otherwise just return the location of the key
- * if it were to be inserted into the tree.
- */
- if ((e = __bt_search(t, key, exactp)) == NULL)
- return (NULL);
- if (!*exactp)
- return (e);
-
- if (ISSET(t, B_DELCRSR)) {
- cpgno = t->bt_bcursor.pgno;
- cindex = t->bt_bcursor.index;
- } else {
- cpgno = P_INVALID;
- cindex = 0; /* GCC thinks it's uninitialized. */
- }
-
- /*
- * Walk backwards, skipping empty pages, as long as the entry matches
- * and there are keys left in the tree. Save a copy of each match in
- * case we go too far. A special case is that we don't return a match
- * on records that the cursor references that have already been flagged
- * for deletion.
- */
- save = *e;
- h = e->page;
- found = 0;
- do {
- if (cpgno != h->pgno || cindex != e->index) {
- if (save.page->pgno != e->page->pgno) {
- mpool_put(t->bt_mp, save.page, 0);
- save = *e;
- } else
- save.index = e->index;
- found = 1;
- }
- /*
- * Make a special effort not to unpin the page the last (or
- * original) match was on, but also make sure it's unpinned
- * if an error occurs.
- */
- while (e->index == 0) {
- if (h->prevpg == P_INVALID)
- goto done1;
- if (h->pgno != save.page->pgno)
- mpool_put(t->bt_mp, h, 0);
- if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) {
- if (h->pgno == save.page->pgno)
- mpool_put(t->bt_mp, save.page, 0);
- return (NULL);
- }
- e->page = h;
- e->index = NEXTINDEX(h);
- }
- --e->index;
- } while (__bt_cmp(t, key, e) == 0);
-
- /*
- * Reach here with the last page that was looked at pinned, which may
- * or may not be the same as the last (or original) match page. If
- * it's not useful, release it.
- */
-done1: if (h->pgno != save.page->pgno)
- mpool_put(t->bt_mp, h, 0);
-
- /*
- * If still haven't found a record, the only possibility left is the
- * next one. Move forward one slot, skipping empty pages and check.
- */
- if (!found) {
- h = save.page;
- if (++save.index == NEXTINDEX(h)) {
- do {
- pg = h->nextpg;
- mpool_put(t->bt_mp, h, 0);
- if (pg == P_INVALID) {
- *exactp = 0;
- return (e);
- }
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (NULL);
- } while ((save.index = NEXTINDEX(h)) == 0);
- save.page = h;
- }
- if (__bt_cmp(t, key, &save) != 0) {
- *exactp = 0;
- return (e);
- }
- }
- *e = save;
- *exactp = 1;
- return (e);
-}
diff --git a/lib/libc/db/btree/bt_overflow.c b/lib/libc/db/btree/bt_overflow.c
index 0057a03..b28b8e0 100644
--- a/lib/libc/db/btree/bt_overflow.c
+++ b/lib/libc/db/btree/bt_overflow.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
@@ -69,7 +69,7 @@ static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94";
*
* Parameters:
* t: tree
- * p: pointer to { pgno_t, size_t }
+ * p: pointer to { pgno_t, u_int32_t }
* buf: storage address
* bufsz: storage size
*
@@ -81,15 +81,16 @@ __ovfl_get(t, p, ssz, buf, bufsz)
BTREE *t;
void *p;
size_t *ssz;
- char **buf;
+ void **buf;
size_t *bufsz;
{
PAGE *h;
pgno_t pg;
- size_t nb, plen, sz;
+ size_t nb, plen;
+ u_int32_t sz;
memmove(&pg, p, sizeof(pgno_t));
- memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t));
+ memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
*ssz = sz;
#ifdef DEBUG
@@ -98,7 +99,8 @@ __ovfl_get(t, p, ssz, buf, bufsz)
#endif
/* Make the buffer bigger as necessary. */
if (*bufsz < sz) {
- if ((*buf = (char *)realloc(*buf, sz)) == NULL)
+ *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
+ if (*buf == NULL)
return (RET_ERROR);
*bufsz = sz;
}
@@ -142,7 +144,8 @@ __ovfl_put(t, dbt, pg)
PAGE *h, *last;
void *p;
pgno_t npg;
- size_t nb, plen, sz;
+ size_t nb, plen;
+ u_int32_t sz;
/*
* Allocate pages and copy the key/data record into them. Store the
@@ -181,7 +184,7 @@ __ovfl_put(t, dbt, pg)
*
* Parameters:
* t: tree
- * p: pointer to { pgno_t, size_t }
+ * p: pointer to { pgno_t, u_int32_t }
*
* Returns:
* RET_ERROR, RET_SUCCESS
@@ -193,10 +196,11 @@ __ovfl_delete(t, p)
{
PAGE *h;
pgno_t pg;
- size_t plen, sz;
+ size_t plen;
+ u_int32_t sz;
memmove(&pg, p, sizeof(pgno_t));
- memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t));
+ memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
#ifdef DEBUG
if (pg == P_INVALID || sz == 0)
diff --git a/lib/libc/db/btree/bt_page.c b/lib/libc/db/btree/bt_page.c
index f71a40d..0d9d138 100644
--- a/lib/libc/db/btree/bt_page.c
+++ b/lib/libc/db/btree/bt_page.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -43,7 +43,8 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94";
#include "btree.h"
/*
- * __BT_FREE -- Put a page on the freelist.
+ * __bt_free --
+ * Put a page on the freelist.
*
* Parameters:
* t: tree
@@ -51,13 +52,16 @@ static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94";
*
* Returns:
* RET_ERROR, RET_SUCCESS
+ *
+ * Side-effect:
+ * mpool_put's the page.
*/
int
__bt_free(t, h)
BTREE *t;
PAGE *h;
{
- /* Insert the page at the start of the free list. */
+ /* Insert the page at the head of the free list. */
h->prevpg = P_INVALID;
h->nextpg = t->bt_free;
t->bt_free = h->pgno;
@@ -67,7 +71,8 @@ __bt_free(t, h)
}
/*
- * __BT_NEW -- Get a new page, preferably from the freelist.
+ * __bt_new --
+ * Get a new page, preferably from the freelist.
*
* Parameters:
* t: tree
@@ -85,9 +90,9 @@ __bt_new(t, npg)
if (t->bt_free != P_INVALID &&
(h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) {
- *npg = t->bt_free;
- t->bt_free = h->nextpg;
- return (h);
+ *npg = t->bt_free;
+ t->bt_free = h->nextpg;
+ return (h);
}
return (mpool_new(t->bt_mp, npg));
}
diff --git a/lib/libc/db/btree/bt_put.c b/lib/libc/db/btree/bt_put.c
index 11a211b..952be09 100644
--- a/lib/libc/db/btree/bt_put.c
+++ b/lib/libc/db/btree/bt_put.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_put.c 8.3 (Berkeley) 9/16/93";
+static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -76,7 +76,7 @@ __bt_put(dbp, key, data, flags)
PAGE *h;
indx_t index, nxtindex;
pgno_t pg;
- size_t nbytes;
+ u_int32_t nbytes;
int dflags, exact, status;
char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
@@ -88,33 +88,38 @@ __bt_put(dbp, key, data, flags)
t->bt_pinned = NULL;
}
+ /* Check for change to a read-only tree. */
+ if (F_ISSET(t, B_RDONLY)) {
+ errno = EPERM;
+ return (RET_ERROR);
+ }
+
switch (flags) {
- case R_CURSOR:
- if (!ISSET(t, B_SEQINIT))
- goto einval;
- if (ISSET(t, B_DELCRSR))
- goto einval;
- break;
case 0:
case R_NOOVERWRITE:
break;
+ case R_CURSOR:
+ /*
+ * If flags is R_CURSOR, put the cursor. Must already
+ * have started a scan and not have already deleted it.
+ */
+ if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
+ !F_ISSET(&t->bt_cursor,
+ CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
+ break;
+ /* FALLTHROUGH */
default:
-einval: errno = EINVAL;
- return (RET_ERROR);
- }
-
- if (ISSET(t, B_RDONLY)) {
- errno = EPERM;
+ errno = EINVAL;
return (RET_ERROR);
}
/*
- * If the key/data won't fit on a page, store it on indirect pages.
- * Only store the key on the overflow page if it's too big after the
- * data is on an overflow page.
+ * If the key/data pair won't fit on a page, store it on overflow
+ * pages. Only put the key on the overflow page if the pair are
+ * still too big after moving the data to an overflow page.
*
* XXX
- * If the insert fails later on, these pages aren't recovered.
+ * If the insert fails later on, the overflow pages aren't recovered.
*/
dflags = 0;
if (key->size + data->size > t->bt_ovflsize) {
@@ -125,7 +130,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
tkey.size = NOVFLSIZE;
memmove(kb, &pg, sizeof(pgno_t));
memmove(kb + sizeof(pgno_t),
- &key->size, sizeof(size_t));
+ &key->size, sizeof(u_int32_t));
dflags |= P_BIGKEY;
key = &tkey;
}
@@ -136,7 +141,7 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
tdata.size = NOVFLSIZE;
memmove(db, &pg, sizeof(pgno_t));
memmove(db + sizeof(pgno_t),
- &data->size, sizeof(size_t));
+ &data->size, sizeof(u_int32_t));
dflags |= P_BIGDATA;
data = &tdata;
}
@@ -146,15 +151,15 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
/* Replace the cursor. */
if (flags == R_CURSOR) {
- if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL)
+ if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
return (RET_ERROR);
- index = t->bt_bcursor.index;
+ index = t->bt_cursor.pg.index;
goto delete;
}
/*
- * Find the key to delete, or, the location at which to insert. Bt_fast
- * and __bt_search pin the returned page.
+ * Find the key to delete, or, the location at which to insert.
+ * Bt_fast and __bt_search both pin the returned page.
*/
if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
if ((e = __bt_search(t, key, &exact)) == NULL)
@@ -163,34 +168,26 @@ storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
index = e->index;
/*
- * Add the specified key/data pair to the tree. If an identical key
- * is already in the tree, and R_NOOVERWRITE is set, an error is
- * returned. If R_NOOVERWRITE is not set, the key is either added (if
- * duplicates are permitted) or an error is returned.
- *
- * Pages are split as required.
+ * Add the key/data pair to the tree. If an identical key is already
+ * in the tree, and R_NOOVERWRITE is set, an error is returned. If
+ * R_NOOVERWRITE is not set, the key is either added (if duplicates are
+ * permitted) or an error is returned.
*/
switch (flags) {
case R_NOOVERWRITE:
if (!exact)
break;
- /*
- * One special case is if the cursor references the record and
- * it's been flagged for deletion. Then, we delete the record,
- * leaving the cursor there -- this means that the inserted
- * record will not be seen in a cursor scan.
- */
- if (ISSET(t, B_DELCRSR) && t->bt_bcursor.pgno == h->pgno &&
- t->bt_bcursor.index == index) {
- CLR(t, B_DELCRSR);
- goto delete;
- }
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
default:
- if (!exact || !ISSET(t, B_NODUPS))
+ if (!exact || !F_ISSET(t, B_NODUPS))
break;
-delete: if (__bt_dleaf(t, h, index) == RET_ERROR) {
+ /*
+ * !!!
+ * Note, the delete may empty the page, so we need to put a
+ * new entry into the page immediately.
+ */
+delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
@@ -220,6 +217,12 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) {
dest = (char *)h + h->upper;
WR_BLEAF(dest, key, data, dflags);
+ /* If the cursor is on this page, adjust it as necessary. */
+ if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
+ !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
+ t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index)
+ ++t->bt_cursor.pg.index;
+
if (t->bt_order == NOT)
if (h->nextpg == P_INVALID) {
if (index == NEXTINDEX(h) - 1) {
@@ -238,11 +241,10 @@ delete: if (__bt_dleaf(t, h, index) == RET_ERROR) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
success:
- if (flags == R_SETCURSOR) {
- t->bt_bcursor.pgno = e->page->pgno;
- t->bt_bcursor.index = e->index;
- }
- SET(t, B_MODIFIED);
+ if (flags == R_SETCURSOR)
+ __bt_setcur(t, e->page->pgno, e->index);
+
+ F_SET(t, B_MODIFIED);
return (RET_SUCCESS);
}
@@ -267,7 +269,7 @@ bt_fast(t, key, data, exactp)
int *exactp;
{
PAGE *h;
- size_t nbytes;
+ u_int32_t nbytes;
int cmp;
if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
@@ -278,8 +280,8 @@ bt_fast(t, key, data, exactp)
t->bt_cur.index = t->bt_last.index;
/*
- * If won't fit in this page or have too many keys in this page, have
- * to search to get split stack.
+ * If won't fit in this page or have too many keys in this page,
+ * have to search to get split stack.
*/
nbytes = NBLEAFDBT(key->size, data->size);
if (h->upper - h->lower < nbytes + sizeof(indx_t))
diff --git a/lib/libc/db/btree/bt_search.c b/lib/libc/db/btree/bt_search.c
index ff33421..485afcb 100644
--- a/lib/libc/db/btree/bt_search.c
+++ b/lib/libc/db/btree/bt_search.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94";
+static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -45,11 +45,12 @@ static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94";
#include <db.h>
#include "btree.h"
-static int bt_snext __P((BTREE *, PAGE *, const DBT *, int *));
-static int bt_sprev __P((BTREE *, PAGE *, const DBT *, int *));
+static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *));
+static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *));
/*
- * __BT_SEARCH -- Search a btree for a key.
+ * __bt_search --
+ * Search a btree for a key.
*
* Parameters:
* t: tree to search
@@ -95,24 +96,26 @@ __bt_search(t, key, exactp)
}
/*
- * If it's a leaf page, and duplicates aren't allowed, we're
- * done. If duplicates are allowed, it's possible that there
- * were duplicate keys on duplicate pages, and they were later
- * deleted, so we could be on a page with no matches while
- * there are matches on other pages. If we're at the start or
- * end of a page, check on both sides.
+ * If it's a leaf page, we're almost done. If no duplicates
+ * are allowed, or we have an exact match, we're done. Else,
+ * it's possible that there were matching keys on this page,
+ * which later deleted, and we're on a page with no matches
+ * while there are matches on other pages. If at the start or
+ * end of a page, check the adjacent page.
*/
if (h->flags & P_BLEAF) {
- t->bt_cur.index = base;
- *exactp = 0;
- if (!ISSET(t, B_NODUPS)) {
+ if (!F_ISSET(t, B_NODUPS)) {
if (base == 0 &&
- bt_sprev(t, h, key, exactp))
+ h->prevpg != P_INVALID &&
+ __bt_sprev(t, h, key, exactp))
return (&t->bt_cur);
if (base == NEXTINDEX(h) &&
- bt_snext(t, h, key, exactp))
+ h->nextpg != P_INVALID &&
+ __bt_snext(t, h, key, exactp))
return (&t->bt_cur);
}
+ *exactp = 0;
+ t->bt_cur.index = base;
return (&t->bt_cur);
}
@@ -125,111 +128,86 @@ __bt_search(t, key, exactp)
*/
index = base ? base - 1 : base;
-next: if (__bt_push(t, h->pgno, index) == RET_ERROR)
- return (NULL);
+next: BT_PUSH(t, h->pgno, index);
pg = GETBINTERNAL(h, index)->pgno;
mpool_put(t->bt_mp, h, 0);
}
}
/*
- * BT_SNEXT -- Check for an exact match after the key.
+ * __bt_snext --
+ * Check for an exact match after the key.
*
* Parameters:
- * t: tree to search
- * h: current page.
- * key: key to find
+ * t: tree
+ * h: current page
+ * key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
-bt_snext(t, h, key, exactp)
+__bt_snext(t, h, key, exactp)
BTREE *t;
PAGE *h;
const DBT *key;
int *exactp;
{
EPG e;
- PAGE *tp;
- pgno_t pg;
- /* Skip until reach the end of the tree or a key. */
- for (pg = h->nextpg; pg != P_INVALID;) {
- if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) {
- mpool_put(t->bt_mp, h, 0);
- return (NULL);
- }
- if (NEXTINDEX(tp) != 0)
- break;
- pg = tp->prevpg;
- mpool_put(t->bt_mp, tp, 0);
- }
/*
- * The key is either an exact match, or not as good as
- * the one we already have.
+ * Get the next page. The key is either an exact
+ * match, or not as good as the one we already have.
*/
- if (pg != P_INVALID) {
- e.page = tp;
- e.index = NEXTINDEX(tp) - 1;
- if (__bt_cmp(t, key, &e) == 0) {
- mpool_put(t->bt_mp, h, 0);
- t->bt_cur = e;
- *exactp = 1;
- return (1);
- }
+ if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
+ return (0);
+ e.index = 0;
+ if (__bt_cmp(t, key, &e) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ t->bt_cur = e;
+ *exactp = 1;
+ return (1);
}
+ mpool_put(t->bt_mp, e.page, 0);
return (0);
}
/*
- * BT_SPREV -- Check for an exact match before the key.
+ * __bt_sprev --
+ * Check for an exact match before the key.
*
* Parameters:
- * t: tree to search
- * h: current page.
- * key: key to find
+ * t: tree
+ * h: current page
+ * key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
-bt_sprev(t, h, key, exactp)
+__bt_sprev(t, h, key, exactp)
BTREE *t;
PAGE *h;
const DBT *key;
int *exactp;
{
EPG e;
- PAGE *tp;
- pgno_t pg;
- /* Skip until reach the beginning of the tree or a key. */
- for (pg = h->prevpg; pg != P_INVALID;) {
- if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) {
- mpool_put(t->bt_mp, h, 0);
- return (NULL);
- }
- if (NEXTINDEX(tp) != 0)
- break;
- pg = tp->prevpg;
- mpool_put(t->bt_mp, tp, 0);
- }
/*
- * The key is either an exact match, or not as good as
- * the one we already have.
+ * Get the previous page. The key is either an exact
+ * match, or not as good as the one we already have.
*/
- if (pg != P_INVALID) {
- e.page = tp;
- e.index = NEXTINDEX(tp) - 1;
- if (__bt_cmp(t, key, &e) == 0) {
- mpool_put(t->bt_mp, h, 0);
- t->bt_cur = e;
- *exactp = 1;
- return (1);
- }
+ if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
+ return (0);
+ e.index = NEXTINDEX(e.page) - 1;
+ if (__bt_cmp(t, key, &e) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ t->bt_cur = e;
+ *exactp = 1;
+ return (1);
}
+ mpool_put(t->bt_mp, e.page, 0);
return (0);
}
diff --git a/lib/libc/db/btree/bt_seq.c b/lib/libc/db/btree/bt_seq.c
index 182ef70..303b481 100644
--- a/lib/libc/db/btree/bt_seq.c
+++ b/lib/libc/db/btree/bt_seq.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93";
+static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -48,24 +48,21 @@ static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93";
#include <db.h>
#include "btree.h"
-static int bt_seqadv __P((BTREE *, EPG *, int));
-static int bt_seqset __P((BTREE *, EPG *, DBT *, int));
+static int __bt_first __P((BTREE *, const DBT *, EPG *, int *));
+static int __bt_seqadv __P((BTREE *, EPG *, int));
+static int __bt_seqset __P((BTREE *, EPG *, DBT *, int));
/*
* Sequential scan support.
*
- * The tree can be scanned sequentially, starting from either end of the tree
- * or from any specific key. A scan request before any scanning is done is
- * initialized as starting from the least node.
- *
- * Each tree has an EPGNO which has the current position of the cursor. The
- * cursor has to survive deletions/insertions in the tree without losing its
- * position. This is done by noting deletions without doing them, and then
- * doing them when the cursor moves (or the tree is closed).
+ * The tree can be scanned sequentially, starting from either end of the
+ * tree or from any specific key. A scan request before any scanning is
+ * done is initialized as starting from the least node.
*/
/*
- * __BT_SEQ -- Btree sequential scan interface.
+ * __bt_seq --
+ * Btree sequential scan interface.
*
* Parameters:
* dbp: pointer to access method
@@ -96,21 +93,21 @@ __bt_seq(dbp, key, data, flags)
/*
* If scan unitialized as yet, or starting at a specific record, set
- * the scan to a specific key. Both bt_seqset and bt_seqadv pin the
- * page the cursor references if they're successful.
+ * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
+ * the page the cursor references if they're successful.
*/
- switch(flags) {
+ switch (flags) {
case R_NEXT:
case R_PREV:
- if (ISSET(t, B_SEQINIT)) {
- status = bt_seqadv(t, &e, flags);
+ if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
+ status = __bt_seqadv(t, &e, flags);
break;
}
/* FALLTHROUGH */
- case R_CURSOR:
case R_FIRST:
case R_LAST:
- status = bt_seqset(t, &e, key, flags);
+ case R_CURSOR:
+ status = __bt_seqset(t, &e, key, flags);
break;
default:
errno = EINVAL;
@@ -118,27 +115,26 @@ __bt_seq(dbp, key, data, flags)
}
if (status == RET_SUCCESS) {
- status = __bt_ret(t, &e, key, data);
+ __bt_setcur(t, e.page->pgno, e.index);
- /* Update the actual cursor. */
- t->bt_bcursor.pgno = e.page->pgno;
- t->bt_bcursor.index = e.index;
+ status =
+ __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
- if (ISSET(t, B_DB_LOCK))
+ if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e.page, 0);
else
t->bt_pinned = e.page;
- SET(t, B_SEQINIT);
}
return (status);
}
/*
- * BT_SEQSET -- Set the sequential scan to a specific key.
+ * __bt_seqset --
+ * Set the sequential scan to a specific key.
*
* Parameters:
* t: tree
@@ -153,87 +149,50 @@ __bt_seq(dbp, key, data, flags)
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
-bt_seqset(t, ep, key, flags)
+__bt_seqset(t, ep, key, flags)
BTREE *t;
EPG *ep;
DBT *key;
int flags;
{
- EPG *e;
PAGE *h;
pgno_t pg;
int exact;
/*
- * Delete any already deleted record that we've been saving because
- * the cursor pointed to it. Since going to a specific key, should
- * delete any logically deleted records so they aren't found.
- */
- if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
- return (RET_ERROR);
-
- /*
- * Find the first, last or specific key in the tree and point the cursor
- * at it. The cursor may not be moved until a new key has been found.
+ * Find the first, last or specific key in the tree and point the
+ * cursor at it. The cursor may not be moved until a new key has
+ * been found.
*/
- switch(flags) {
+ switch (flags) {
case R_CURSOR: /* Keyed scan. */
/*
- * Find the first instance of the key or the smallest key which
- * is greater than or equal to the specified key. If run out
- * of keys, return RET_SPECIAL.
+ * Find the first instance of the key or the smallest key
+ * which is greater than or equal to the specified key.
*/
if (key->data == NULL || key->size == 0) {
errno = EINVAL;
return (RET_ERROR);
}
- e = __bt_first(t, key, &exact); /* Returns pinned page. */
- if (e == NULL)
- return (RET_ERROR);
- /*
- * If at the end of a page, skip any empty pages and find the
- * next entry.
- */
- if (e->index == NEXTINDEX(e->page)) {
- h = e->page;
- do {
- pg = h->nextpg;
- mpool_put(t->bt_mp, h, 0);
- if (pg == P_INVALID)
- return (RET_SPECIAL);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- } while (NEXTINDEX(h) == 0);
- e->index = 0;
- e->page = h;
- }
- *ep = *e;
- break;
+ return (__bt_first(t, key, ep, &exact));
case R_FIRST: /* First record. */
case R_NEXT:
/* Walk down the left-hand side of the tree. */
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
+
+ /* Check for an empty tree. */
+ if (NEXTINDEX(h) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SPECIAL);
+ }
+
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, 0)->pgno;
mpool_put(t->bt_mp, h, 0);
}
-
- /* Skip any empty pages. */
- while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
- pg = h->nextpg;
- mpool_put(t->bt_mp, h, 0);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- }
-
- if (NEXTINDEX(h) == 0) {
- mpool_put(t->bt_mp, h, 0);
- return (RET_SPECIAL);
- }
-
ep->page = h;
ep->index = 0;
break;
@@ -243,25 +202,19 @@ bt_seqset(t, ep, key, flags)
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
+
+ /* Check for an empty tree. */
+ if (NEXTINDEX(h) == 0) {
+ mpool_put(t->bt_mp, h, 0);
+ return (RET_SPECIAL);
+ }
+
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
mpool_put(t->bt_mp, h, 0);
}
- /* Skip any empty pages. */
- while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
- pg = h->prevpg;
- mpool_put(t->bt_mp, h, 0);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- }
-
- if (NEXTINDEX(h) == 0) {
- mpool_put(t->bt_mp, h, 0);
- return (RET_SPECIAL);
- }
-
ep->page = h;
ep->index = NEXTINDEX(h) - 1;
break;
@@ -270,7 +223,8 @@ bt_seqset(t, ep, key, flags)
}
/*
- * BT_SEQADVANCE -- Advance the sequential scan.
+ * __bt_seqadvance --
+ * Advance the sequential scan.
*
* Parameters:
* t: tree
@@ -283,98 +237,224 @@ bt_seqset(t, ep, key, flags)
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
-bt_seqadv(t, e, flags)
+__bt_seqadv(t, ep, flags)
BTREE *t;
- EPG *e;
+ EPG *ep;
int flags;
{
- EPGNO *c, delc;
+ CURSOR *c;
PAGE *h;
indx_t index;
pgno_t pg;
+ int exact;
- /* Save the current cursor if going to delete it. */
- c = &t->bt_bcursor;
- if (ISSET(t, B_DELCRSR))
- delc = *c;
+ /*
+ * There are a couple of states that we can be in. The cursor has
+ * been initialized by the time we get here, but that's all we know.
+ */
+ c = &t->bt_cursor;
- if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
+ /*
+ * The cursor was deleted where there weren't any duplicate records,
+ * so the key was saved. Find out where that key would go in the
+ * current tree. It doesn't matter if the returned key is an exact
+ * match or not -- if it's an exact match, the record was added after
+ * the delete so we can just return it. If not, as long as there's
+ * a record there, return it.
+ */
+ if (F_ISSET(c, CURS_ACQUIRE))
+ return (__bt_first(t, &c->key, ep, &exact));
+
+ /* Get the page referenced by the cursor. */
+ if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
return (RET_ERROR);
/*
- * Find the next/previous record in the tree and point the cursor at it.
- * The cursor may not be moved until a new key has been found.
+ * Find the next/previous record in the tree and point the cursor at
+ * it. The cursor may not be moved until a new key has been found.
*/
- index = c->index;
- switch(flags) {
+ switch (flags) {
case R_NEXT: /* Next record. */
+ /*
+ * The cursor was deleted in duplicate records, and moved
+ * forward to a record that has yet to be returned. Clear
+ * that flag, and return the record.
+ */
+ if (F_ISSET(c, CURS_AFTER))
+ goto usecurrent;
+ index = c->pg.index;
if (++index == NEXTINDEX(h)) {
- do {
- pg = h->nextpg;
- mpool_put(t->bt_mp, h, 0);
- if (pg == P_INVALID)
- return (RET_SPECIAL);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- } while (NEXTINDEX(h) == 0);
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
index = 0;
}
break;
case R_PREV: /* Previous record. */
- if (index-- == 0) {
- do {
- pg = h->prevpg;
- mpool_put(t->bt_mp, h, 0);
- if (pg == P_INVALID)
- return (RET_SPECIAL);
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- } while (NEXTINDEX(h) == 0);
- index = NEXTINDEX(h) - 1;
+ /*
+ * The cursor was deleted in duplicate records, and moved
+ * backward to a record that has yet to be returned. Clear
+ * that flag, and return the record.
+ */
+ if (F_ISSET(c, CURS_BEFORE)) {
+usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);
+ ep->page = h;
+ ep->index = c->pg.index;
+ return (RET_SUCCESS);
}
+ index = c->pg.index;
+ if (index == 0) {
+ pg = h->prevpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
+ return (RET_ERROR);
+ index = NEXTINDEX(h) - 1;
+ } else
+ --index;
break;
}
- e->page = h;
- e->index = index;
+ ep->page = h;
+ ep->index = index;
+ return (RET_SUCCESS);
+}
+
+/*
+ * __bt_first --
+ * Find the first entry.
+ *
+ * Parameters:
+ * t: the tree
+ * key: the key
+ * erval: return EPG
+ * exactp: pointer to exact match flag
+ *
+ * Returns:
+ * The first entry in the tree greater than or equal to key,
+ * or RET_SPECIAL if no such key exists.
+ */
+static int
+__bt_first(t, key, erval, exactp)
+ BTREE *t;
+ const DBT *key;
+ EPG *erval;
+ int *exactp;
+{
+ PAGE *h;
+ EPG *ep, save;
+ pgno_t pg;
/*
- * Delete any already deleted record that we've been saving because the
- * cursor pointed to it. This could cause the new index to be shifted
- * down by one if the record we're deleting is on the same page and has
- * a larger index.
+ * Find any matching record; __bt_search pins the page.
+ *
+ * If it's an exact match and duplicates are possible, walk backwards
+ * in the tree until we find the first one. Otherwise, make sure it's
+ * a valid key (__bt_search may return an index just past the end of a
+ * page) and return it.
*/
- if (ISSET(t, B_DELCRSR)) {
- CLR(t, B_DELCRSR); /* Don't try twice. */
- if (c->pgno == delc.pgno && c->index > delc.index)
- --c->index;
- if (__bt_crsrdel(t, &delc))
+ if ((ep = __bt_search(t, key, exactp)) == NULL)
+ return (NULL);
+ if (*exactp) {
+ if (F_ISSET(t, B_NODUPS)) {
+ *erval = *ep;
+ return (RET_SUCCESS);
+ }
+
+ /*
+ * Walk backwards, as long as the entry matches and there are
+ * keys left in the tree. Save a copy of each match in case
+ * we go too far.
+ */
+ save = *ep;
+ h = ep->page;
+ do {
+ if (save.page->pgno != ep->page->pgno) {
+ mpool_put(t->bt_mp, save.page, 0);
+ save = *ep;
+ } else
+ save.index = ep->index;
+
+ /*
+ * Don't unpin the page the last (or original) match
+ * was on, but make sure it's unpinned if an error
+ * occurs.
+ */
+ if (ep->index == 0) {
+ if (h->prevpg == P_INVALID)
+ break;
+ if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, 0);
+ if ((h = mpool_get(t->bt_mp,
+ h->prevpg, 0)) == NULL) {
+ if (h->pgno == save.page->pgno)
+ mpool_put(t->bt_mp,
+ save.page, 0);
+ return (RET_ERROR);
+ }
+ ep->page = h;
+ ep->index = NEXTINDEX(h);
+ }
+ --ep->index;
+ } while (__bt_cmp(t, key, ep) == 0);
+
+ /*
+ * Reach here with the last page that was looked at pinned,
+ * which may or may not be the same as the last (or original)
+ * match page. If it's not useful, release it.
+ */
+ if (h->pgno != save.page->pgno)
+ mpool_put(t->bt_mp, h, 0);
+
+ *erval = save;
+ return (RET_SUCCESS);
+ }
+
+ /* If at the end of a page, find the next entry. */
+ if (ep->index == NEXTINDEX(ep->page)) {
+ h = ep->page;
+ pg = h->nextpg;
+ mpool_put(t->bt_mp, h, 0);
+ if (pg == P_INVALID)
+ return (RET_SPECIAL);
+ if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
+ ep->index = 0;
+ ep->page = h;
}
+ *erval = *ep;
return (RET_SUCCESS);
}
/*
- * __BT_CRSRDEL -- Delete the record referenced by the cursor.
+ * __bt_setcur --
+ * Set the cursor to an entry in the tree.
*
* Parameters:
- * t: tree
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
+ * t: the tree
+ * pgno: page number
+ * index: page index
*/
-int
-__bt_crsrdel(t, c)
+void
+__bt_setcur(t, pgno, index)
BTREE *t;
- EPGNO *c;
+ pgno_t pgno;
+ u_int index;
{
- PAGE *h;
- int status;
+ /* Lose any already deleted key. */
+ if (t->bt_cursor.key.data != NULL) {
+ free(t->bt_cursor.key.data);
+ t->bt_cursor.key.size = 0;
+ t->bt_cursor.key.data = NULL;
+ }
+ F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
- CLR(t, B_DELCRSR); /* Don't try twice. */
- if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
- return (RET_ERROR);
- status = __bt_dleaf(t, h, c->index);
- mpool_put(t->bt_mp, h, MPOOL_DIRTY);
- return (status);
+ /* Update the cursor. */
+ t->bt_cursor.pg.pgno = pgno;
+ t->bt_cursor.pg.index = index;
+ F_SET(&t->bt_cursor, CURS_INIT);
}
diff --git a/lib/libc/db/btree/bt_split.c b/lib/libc/db/btree/bt_split.c
index 4a572c0..1646d82 100644
--- a/lib/libc/db/btree/bt_split.c
+++ b/lib/libc/db/btree/bt_split.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_split.c 8.3 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_split.c 8.9 (Berkeley) 7/26/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
@@ -79,13 +79,13 @@ u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
* RET_ERROR, RET_SUCCESS
*/
int
-__bt_split(t, sp, key, data, flags, ilen, skip)
+__bt_split(t, sp, key, data, flags, ilen, argskip)
BTREE *t;
PAGE *sp;
const DBT *key, *data;
int flags;
size_t ilen;
- indx_t skip;
+ u_int32_t argskip;
{
BINTERNAL *bi;
BLEAF *bl, *tbl;
@@ -93,7 +93,8 @@ __bt_split(t, sp, key, data, flags, ilen, skip)
EPGNO *parent;
PAGE *h, *l, *r, *lchild, *rchild;
indx_t nxtindex;
- size_t n, nbytes, nksize;
+ u_int16_t skip;
+ u_int32_t n, nbytes, nksize;
int parentsplit;
char *dest;
@@ -103,6 +104,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip)
* skip set to the offset which should be used. Additionally, l and r
* are pinned.
*/
+ skip = argskip;
h = sp->pgno == P_ROOT ?
bt_root(t, sp, &l, &r, &skip, ilen) :
bt_page(t, sp, &l, &r, &skip, ilen);
@@ -115,14 +117,14 @@ __bt_split(t, sp, key, data, flags, ilen, skip)
*/
h->linp[skip] = h->upper -= ilen;
dest = (char *)h + h->upper;
- if (ISSET(t, R_RECNO))
+ if (F_ISSET(t, R_RECNO))
WR_RLEAF(dest, data, flags)
else
WR_BLEAF(dest, key, data, flags)
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
- (ISSET(t, R_RECNO) ?
+ (F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err2;
@@ -230,7 +232,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip)
}
/* Insert the key into the parent page. */
- switch(rchild->flags & P_TYPE) {
+ switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
@@ -295,7 +297,7 @@ __bt_split(t, sp, key, data, flags, ilen, skip)
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
- (ISSET(t, R_RECNO) ?
+ (F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err1;
@@ -388,6 +390,9 @@ bt_page(t, h, lp, rp, skip, ilen)
mpool_put(t->bt_mp, r, 0);
return (NULL);
}
+#ifdef PURIFY
+ memset(l, 0xff, t->bt_psize);
+#endif
l->pgno = h->pgno;
l->nextpg = r->pgno;
l->prevpg = h->prevpg;
@@ -403,7 +408,7 @@ bt_page(t, h, lp, rp, skip, ilen)
return (NULL);
}
tp->prevpg = r->pgno;
- mpool_put(t->bt_mp, tp, 0);
+ mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
}
/*
@@ -534,7 +539,7 @@ bt_broot(t, h, l, r)
{
BINTERNAL *bi;
BLEAF *bl;
- size_t nbytes;
+ u_int32_t nbytes;
char *dest;
/*
@@ -550,7 +555,7 @@ bt_broot(t, h, l, r)
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, 0, l->pgno, 0);
- switch(h->flags & P_TYPE) {
+ switch (h->flags & P_TYPE) {
case P_BLEAF:
bl = GETBLEAF(r, 0);
nbytes = NBINTERNAL(bl->ksize);
@@ -613,12 +618,12 @@ bt_psplit(t, h, l, r, pskip, ilen)
{
BINTERNAL *bi;
BLEAF *bl;
+ CURSOR *c;
RLEAF *rl;
- EPGNO *c;
PAGE *rval;
void *src;
indx_t full, half, nxt, off, skip, top, used;
- size_t nbytes;
+ u_int32_t nbytes;
int bigkeycnt, isbigkey;
/*
@@ -702,19 +707,16 @@ bt_psplit(t, h, l, r, pskip, ilen)
* cursor is at or past the skipped slot, the cursor is incremented by
* one. If the cursor is on the right page, it is decremented by the
* number of records split to the left page.
- *
- * Don't bother checking for the B_SEQINIT flag, the page number will
- * be P_INVALID.
*/
- c = &t->bt_bcursor;
- if (c->pgno == h->pgno) {
- if (c->index >= skip)
- ++c->index;
- if (c->index < nxt) /* Left page. */
- c->pgno = l->pgno;
+ c = &t->bt_cursor;
+ if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
+ if (c->pg.index >= skip)
+ ++c->pg.index;
+ if (c->pg.index < nxt) /* Left page. */
+ c->pg.pgno = l->pgno;
else { /* Right page. */
- c->pgno = r->pgno;
- c->index -= nxt;
+ c->pg.pgno = r->pgno;
+ c->pg.index -= nxt;
}
}
diff --git a/lib/libc/db/btree/bt_utils.c b/lib/libc/db/btree/bt_utils.c
index d2d1f73..9c1438e 100644
--- a/lib/libc/db/btree/bt_utils.c
+++ b/lib/libc/db/btree/bt_utils.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1990, 1993
+ * Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -35,7 +35,7 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94";
+static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
@@ -48,78 +48,91 @@ static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94";
#include "btree.h"
/*
- * __BT_RET -- Build return key/data pair as a result of search or scan.
+ * __bt_ret --
+ * Build return key/data pair.
*
* Parameters:
* t: tree
- * d: LEAF to be returned to the user.
+ * e: key/data pair to be returned
* key: user's key structure (NULL if not to be filled in)
- * data: user's data structure
+ * rkey: memory area to hold key
+ * data: user's data structure (NULL if not to be filled in)
+ * rdata: memory area to hold data
+ * copy: always copy the key/data item
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
-__bt_ret(t, e, key, data)
+__bt_ret(t, e, key, rkey, data, rdata, copy)
BTREE *t;
EPG *e;
- DBT *key, *data;
+ DBT *key, *rkey, *data, *rdata;
+ int copy;
{
- register BLEAF *bl;
- register void *p;
+ BLEAF *bl;
+ void *p;
bl = GETBLEAF(e->page, e->index);
/*
- * We always copy big keys/data to make them contigous. Otherwise,
- * we leave the page pinned and don't copy unless the user specified
+ * We must copy big keys/data to make them contigous. Otherwise,
+ * leave the page pinned and don't copy unless the user specified
* concurrent access.
*/
- if (bl->flags & P_BIGDATA) {
- if (__ovfl_get(t, bl->bytes + bl->ksize,
- &data->size, &t->bt_dbuf, &t->bt_dbufsz))
+ if (key == NULL)
+ goto dataonly;
+
+ if (bl->flags & P_BIGKEY) {
+ if (__ovfl_get(t, bl->bytes,
+ &key->size, &rkey->data, &rkey->size))
return (RET_ERROR);
- data->data = t->bt_dbuf;
- } else if (ISSET(t, B_DB_LOCK)) {
- /* Use +1 in case the first record retrieved is 0 length. */
- if (bl->dsize + 1 > t->bt_dbufsz) {
- if ((p =
- (void *)realloc(t->bt_dbuf, bl->dsize + 1)) == NULL)
+ key->data = rkey->data;
+ } else if (copy || F_ISSET(t, B_DB_LOCK)) {
+ if (bl->ksize > rkey->size) {
+ p = (void *)(rkey->data == NULL ?
+ malloc(bl->ksize) : realloc(rkey->data, bl->ksize));
+ if (p == NULL)
return (RET_ERROR);
- t->bt_dbuf = p;
- t->bt_dbufsz = bl->dsize + 1;
+ rkey->data = p;
+ rkey->size = bl->ksize;
}
- memmove(t->bt_dbuf, bl->bytes + bl->ksize, bl->dsize);
- data->size = bl->dsize;
- data->data = t->bt_dbuf;
+ memmove(rkey->data, bl->bytes, bl->ksize);
+ key->size = bl->ksize;
+ key->data = rkey->data;
} else {
- data->size = bl->dsize;
- data->data = bl->bytes + bl->ksize;
+ key->size = bl->ksize;
+ key->data = bl->bytes;
}
- if (key == NULL)
+dataonly:
+ if (data == NULL)
return (RET_SUCCESS);
- if (bl->flags & P_BIGKEY) {
- if (__ovfl_get(t, bl->bytes,
- &key->size, &t->bt_kbuf, &t->bt_kbufsz))
+ if (bl->flags & P_BIGDATA) {
+ if (__ovfl_get(t, bl->bytes + bl->ksize,
+ &data->size, &rdata->data, &rdata->size))
return (RET_ERROR);
- key->data = t->bt_kbuf;
- } else if (ISSET(t, B_DB_LOCK)) {
- if (bl->ksize > t->bt_kbufsz) {
- if ((p =
- (void *)realloc(t->bt_kbuf, bl->ksize)) == NULL)
+ data->data = rdata->data;
+ } else if (copy || F_ISSET(t, B_DB_LOCK)) {
+ /* Use +1 in case the first record retrieved is 0 length. */
+ if (bl->dsize + 1 > rdata->size) {
+ p = (void *)(rdata->data == NULL ?
+ malloc(bl->dsize + 1) :
+ realloc(rdata->data, bl->dsize + 1));
+ if (p == NULL)
return (RET_ERROR);
- t->bt_kbuf = p;
- t->bt_kbufsz = bl->ksize;
+ rdata->data = p;
+ rdata->size = bl->dsize + 1;
}
- memmove(t->bt_kbuf, bl->bytes, bl->ksize);
- key->size = bl->ksize;
- key->data = t->bt_kbuf;
+ memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize);
+ data->size = bl->dsize;
+ data->data = rdata->data;
} else {
- key->size = bl->ksize;
- key->data = bl->bytes;
+ data->size = bl->dsize;
+ data->data = bl->bytes + bl->ksize;
}
+
return (RET_SUCCESS);
}
@@ -180,9 +193,9 @@ __bt_cmp(t, k1, e)
if (bigkey) {
if (__ovfl_get(t, bigkey,
- &k2.size, &t->bt_dbuf, &t->bt_dbufsz))
+ &k2.size, &t->bt_rdata.data, &t->bt_rdata.size))
return (RET_ERROR);
- k2.data = t->bt_dbuf;
+ k2.data = t->bt_rdata.data;
}
return ((*t->bt_cmp)(k1, &k2));
}
diff --git a/lib/libc/db/btree/btree.h b/lib/libc/db/btree/btree.h
index dd798ec..36d35c9 100644
--- a/lib/libc/db/btree/btree.h
+++ b/lib/libc/db/btree/btree.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1991, 1993
+ * Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -33,9 +33,14 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)btree.h 8.5 (Berkeley) 2/21/94
+ * @(#)btree.h 8.11 (Berkeley) 8/17/94
*/
+/* Macros to set/clear/test flags. */
+#define F_SET(p, f) (p)->flags |= (f)
+#define F_CLR(p, f) (p)->flags &= ~(f)
+#define F_ISSET(p, f) ((p)->flags & (f))
+
#include <mpool.h>
#define DEFMINKEYPAGE (2) /* Minimum keys per page */
@@ -79,8 +84,9 @@ typedef struct _page {
} PAGE;
/* First and next index. */
-#define BTDATAOFF (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
- sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t))
+#define BTDATAOFF \
+ (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
+ sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t))
#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t))
/*
@@ -99,9 +105,8 @@ typedef struct _page {
* be manipulated without copying. (This presumes that 32 bit items can be
* manipulated on this system.)
*/
-#define LALIGN(n) \
- (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1))
-#define NOVFLSIZE (sizeof(pgno_t) + sizeof(size_t))
+#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1))
+#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t))
/*
* For the btree internal pages, the item is a key. BINTERNALs are {key, pgno}
@@ -113,7 +118,7 @@ typedef struct _page {
* some minor modifications of the above rule.
*/
typedef struct _binternal {
- size_t ksize; /* key size */
+ u_int32_t ksize; /* key size */
pgno_t pgno; /* page number stored on */
#define P_BIGDATA 0x01 /* overflow data */
#define P_BIGKEY 0x02 /* overflow key */
@@ -122,21 +127,21 @@ typedef struct _binternal {
} BINTERNAL;
/* Get the page's BINTERNAL structure at index indx. */
-#define GETBINTERNAL(pg, indx) \
+#define GETBINTERNAL(pg, indx) \
((BINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
-#define NBINTERNAL(len) \
- LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
+#define NBINTERNAL(len) \
+ LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
/* Copy a BINTERNAL entry to the page. */
-#define WR_BINTERNAL(p, size, pgno, flags) { \
- *(size_t *)p = size; \
- p += sizeof(size_t); \
- *(pgno_t *)p = pgno; \
- p += sizeof(pgno_t); \
- *(u_char *)p = flags; \
- p += sizeof(u_char); \
+#define WR_BINTERNAL(p, size, pgno, flags) { \
+ *(u_int32_t *)p = size; \
+ p += sizeof(u_int32_t); \
+ *(pgno_t *)p = pgno; \
+ p += sizeof(pgno_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
}
/*
@@ -149,78 +154,78 @@ typedef struct _rinternal {
} RINTERNAL;
/* Get the page's RINTERNAL structure at index indx. */
-#define GETRINTERNAL(pg, indx) \
+#define GETRINTERNAL(pg, indx) \
((RINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
-#define NRINTERNAL \
+#define NRINTERNAL \
LALIGN(sizeof(recno_t) + sizeof(pgno_t))
/* Copy a RINTERAL entry to the page. */
-#define WR_RINTERNAL(p, nrecs, pgno) { \
- *(recno_t *)p = nrecs; \
- p += sizeof(recno_t); \
- *(pgno_t *)p = pgno; \
+#define WR_RINTERNAL(p, nrecs, pgno) { \
+ *(recno_t *)p = nrecs; \
+ p += sizeof(recno_t); \
+ *(pgno_t *)p = pgno; \
}
/* For the btree leaf pages, the item is a key and data pair. */
typedef struct _bleaf {
- size_t ksize; /* size of key */
- size_t dsize; /* size of data */
+ u_int32_t ksize; /* size of key */
+ u_int32_t dsize; /* size of data */
u_char flags; /* P_BIGDATA, P_BIGKEY */
char bytes[1]; /* data */
} BLEAF;
/* Get the page's BLEAF structure at index indx. */
-#define GETBLEAF(pg, indx) \
+#define GETBLEAF(pg, indx) \
((BLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize)
/* Get the number of bytes in the user's key/data pair. */
-#define NBLEAFDBT(ksize, dsize) \
- LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \
+#define NBLEAFDBT(ksize, dsize) \
+ LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \
(ksize) + (dsize))
/* Copy a BLEAF entry to the page. */
-#define WR_BLEAF(p, key, data, flags) { \
- *(size_t *)p = key->size; \
- p += sizeof(size_t); \
- *(size_t *)p = data->size; \
- p += sizeof(size_t); \
- *(u_char *)p = flags; \
- p += sizeof(u_char); \
- memmove(p, key->data, key->size); \
- p += key->size; \
- memmove(p, data->data, data->size); \
+#define WR_BLEAF(p, key, data, flags) { \
+ *(u_int32_t *)p = key->size; \
+ p += sizeof(u_int32_t); \
+ *(u_int32_t *)p = data->size; \
+ p += sizeof(u_int32_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
+ memmove(p, key->data, key->size); \
+ p += key->size; \
+ memmove(p, data->data, data->size); \
}
/* For the recno leaf pages, the item is a data entry. */
typedef struct _rleaf {
- size_t dsize; /* size of data */
+ u_int32_t dsize; /* size of data */
u_char flags; /* P_BIGDATA */
char bytes[1];
} RLEAF;
/* Get the page's RLEAF structure at index indx. */
-#define GETRLEAF(pg, indx) \
+#define GETRLEAF(pg, indx) \
((RLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NRLEAF(p) NRLEAFDBT((p)->dsize)
/* Get the number of bytes from the user's data. */
-#define NRLEAFDBT(dsize) \
- LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize))
+#define NRLEAFDBT(dsize) \
+ LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize))
/* Copy a RLEAF entry to the page. */
-#define WR_RLEAF(p, data, flags) { \
- *(size_t *)p = data->size; \
- p += sizeof(size_t); \
- *(u_char *)p = flags; \
- p += sizeof(u_char); \
- memmove(p, data->data, data->size); \
+#define WR_RLEAF(p, data, flags) { \
+ *(u_int32_t *)p = data->size; \
+ p += sizeof(u_int32_t); \
+ *(u_char *)p = flags; \
+ p += sizeof(u_char); \
+ memmove(p, data->data, data->size); \
}
/*
@@ -232,12 +237,6 @@ typedef struct _rleaf {
* record less than key in the tree so that descents work. Leaf page searches
* must find the smallest record greater than key so that the returned index
* is the record's correct position for insertion.
- *
- * One comment about cursors. The cursor key is never removed from the tree,
- * even if deleted. This is because it is quite difficult to decide where the
- * cursor should be when other keys have been inserted/deleted in the tree;
- * duplicate keys make it impossible. This scheme does require extra work
- * though, to make sure that we don't perform an operation on a deleted key.
*/
typedef struct _epgno {
pgno_t pgno; /* the page number */
@@ -250,53 +249,90 @@ typedef struct _epg {
} EPG;
/*
- * The metadata of the tree. The m_nrecs field is used only by the RECNO code.
+ * About cursors. The cursor (and the page that contained the key/data pair
+ * that it referenced) can be deleted, which makes things a bit tricky. If
+ * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set
+ * or there simply aren't any duplicates of the key) we copy the key that it
+ * referenced when it's deleted, and reacquire a new cursor key if the cursor
+ * is used again. If there are duplicates keys, we move to the next/previous
+ * key, and set a flag so that we know what happened. NOTE: if duplicate (to
+ * the cursor) keys are added to the tree during this process, it is undefined
+ * if they will be returned or not in a cursor scan.
+ *
+ * The flags determine the possible states of the cursor:
+ *
+ * CURS_INIT The cursor references *something*.
+ * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that
+ * we can reacquire the right position in the tree.
+ * CURS_AFTER, CURS_BEFORE
+ * The cursor was deleted, and now references a key/data pair
+ * that has not yet been returned, either before or after the
+ * deleted key/data pair.
+ * XXX
+ * This structure is broken out so that we can eventually offer multiple
+ * cursors as part of the DB interface.
+ */
+typedef struct _cursor {
+ EPGNO pg; /* B: Saved tree reference. */
+ DBT key; /* B: Saved key, or key.data == NULL. */
+ recno_t rcursor; /* R: recno cursor (1-based) */
+
+#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */
+#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */
+#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */
+#define CURS_INIT 0x08 /* RB: Cursor initialized. */
+ u_int8_t flags;
+} CURSOR;
+
+/*
+ * The metadata of the tree. The nrecs field is used only by the RECNO code.
* This is because the btree doesn't really need it and it requires that every
* put or delete call modify the metadata.
*/
typedef struct _btmeta {
- u_int32_t m_magic; /* magic number */
- u_int32_t m_version; /* version */
- u_int32_t m_psize; /* page size */
- u_int32_t m_free; /* page number of first free page */
- u_int32_t m_nrecs; /* R: number of records */
+ u_int32_t magic; /* magic number */
+ u_int32_t version; /* version */
+ u_int32_t psize; /* page size */
+ u_int32_t free; /* page number of first free page */
+ u_int32_t nrecs; /* R: number of records */
+
#define SAVEMETA (B_NODUPS | R_RECNO)
- u_int32_t m_flags; /* bt_flags & SAVEMETA */
- u_int32_t m_unused; /* unused */
+ u_int32_t flags; /* bt_flags & SAVEMETA */
} BTMETA;
/* The in-memory btree/recno data structure. */
typedef struct _btree {
- MPOOL *bt_mp; /* memory pool cookie */
+ MPOOL *bt_mp; /* memory pool cookie */
- DB *bt_dbp; /* pointer to enclosing DB */
+ DB *bt_dbp; /* pointer to enclosing DB */
- EPG bt_cur; /* current (pinned) page */
- PAGE *bt_pinned; /* page pinned across calls */
+ EPG bt_cur; /* current (pinned) page */
+ PAGE *bt_pinned; /* page pinned across calls */
- EPGNO bt_bcursor; /* B: btree cursor */
- recno_t bt_rcursor; /* R: recno cursor (1-based) */
+ CURSOR bt_cursor; /* cursor */
-#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL)
-#define BT_CLR(t) (t->bt_sp = 0)
- EPGNO *bt_stack; /* stack of parent pages */
- u_int bt_sp; /* current stack pointer */
- u_int bt_maxstack; /* largest stack */
+#define BT_PUSH(t, p, i) { \
+ t->bt_sp->pgno = p; \
+ t->bt_sp->index = i; \
+ ++t->bt_sp; \
+}
+#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp)
+#define BT_CLR(t) (t->bt_sp = t->bt_stack)
+ EPGNO bt_stack[50]; /* stack of parent pages */
+ EPGNO *bt_sp; /* current stack pointer */
- char *bt_kbuf; /* key buffer */
- size_t bt_kbufsz; /* key buffer size */
- char *bt_dbuf; /* data buffer */
- size_t bt_dbufsz; /* data buffer size */
+ DBT bt_rkey; /* returned key */
+ DBT bt_rdata; /* returned data */
- int bt_fd; /* tree file descriptor */
+ int bt_fd; /* tree file descriptor */
- pgno_t bt_free; /* next free page */
+ pgno_t bt_free; /* next free page */
u_int32_t bt_psize; /* page size */
- indx_t bt_ovflsize; /* cut-off for key/data overflow */
- int bt_lorder; /* byte order */
+ indx_t bt_ovflsize; /* cut-off for key/data overflow */
+ int bt_lorder; /* byte order */
/* sorted order */
enum { NOT, BACK, FORWARD } bt_order;
- EPGNO bt_last; /* last insert */
+ EPGNO bt_last; /* last insert */
/* B: key comparison function */
int (*bt_cmp) __P((const DBT *, const DBT *));
@@ -305,49 +341,43 @@ typedef struct _btree {
/* R: recno input function */
int (*bt_irec) __P((struct _btree *, recno_t));
- FILE *bt_rfp; /* R: record FILE pointer */
- int bt_rfd; /* R: record file descriptor */
+ FILE *bt_rfp; /* R: record FILE pointer */
+ int bt_rfd; /* R: record file descriptor */
- caddr_t bt_cmap; /* R: current point in mapped space */
- caddr_t bt_smap; /* R: start of mapped space */
- caddr_t bt_emap; /* R: end of mapped space */
- size_t bt_msize; /* R: size of mapped region. */
+ caddr_t bt_cmap; /* R: current point in mapped space */
+ caddr_t bt_smap; /* R: start of mapped space */
+ caddr_t bt_emap; /* R: end of mapped space */
+ size_t bt_msize; /* R: size of mapped region. */
- recno_t bt_nrecs; /* R: number of records */
- size_t bt_reclen; /* R: fixed record length */
- u_char bt_bval; /* R: delimiting byte/pad character */
+ recno_t bt_nrecs; /* R: number of records */
+ size_t bt_reclen; /* R: fixed record length */
+ u_char bt_bval; /* R: delimiting byte/pad character */
/*
* NB:
* B_NODUPS and R_RECNO are stored on disk, and may not be changed.
*/
-#define B_DELCRSR 0x00001 /* cursor has been deleted */
-#define B_INMEM 0x00002 /* in-memory tree */
-#define B_METADIRTY 0x00004 /* need to write metadata */
-#define B_MODIFIED 0x00008 /* tree modified */
-#define B_NEEDSWAP 0x00010 /* if byte order requires swapping */
+#define B_INMEM 0x00001 /* in-memory tree */
+#define B_METADIRTY 0x00002 /* need to write metadata */
+#define B_MODIFIED 0x00004 /* tree modified */
+#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */
+#define B_RDONLY 0x00010 /* read-only tree */
+
#define B_NODUPS 0x00020 /* no duplicate keys permitted */
-#define B_RDONLY 0x00040 /* read-only tree */
#define R_RECNO 0x00080 /* record oriented tree */
-#define B_SEQINIT 0x00100 /* sequential scan initialized */
-
-#define R_CLOSEFP 0x00200 /* opened a file pointer */
-#define R_EOF 0x00400 /* end of input file reached. */
-#define R_FIXLEN 0x00800 /* fixed length records */
-#define R_MEMMAPPED 0x01000 /* memory mapped file. */
-#define R_INMEM 0x02000 /* in-memory file */
-#define R_MODIFIED 0x04000 /* modified file */
-#define R_RDONLY 0x08000 /* read-only file */
-#define B_DB_LOCK 0x10000 /* DB_LOCK specified. */
-#define B_DB_SHMEM 0x20000 /* DB_SHMEM specified. */
-#define B_DB_TXN 0x40000 /* DB_TXN specified. */
-
- u_int32_t bt_flags; /* btree state */
+#define R_CLOSEFP 0x00040 /* opened a file pointer */
+#define R_EOF 0x00100 /* end of input file reached. */
+#define R_FIXLEN 0x00200 /* fixed length records */
+#define R_MEMMAPPED 0x00400 /* memory mapped file. */
+#define R_INMEM 0x00800 /* in-memory file */
+#define R_MODIFIED 0x01000 /* modified file */
+#define R_RDONLY 0x02000 /* read-only file */
+
+#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */
+#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */
+#define B_DB_TXN 0x10000 /* DB_TXN specified. */
+ u_int32_t flags;
} BTREE;
-#define SET(t, f) ((t)->bt_flags |= (f))
-#define CLR(t, f) ((t)->bt_flags &= ~(f))
-#define ISSET(t, f) ((t)->bt_flags & (f))
-
#include "extern.h"
diff --git a/lib/libc/db/btree/extern.h b/lib/libc/db/btree/extern.h
index 4007bc7..ebd9c54 100644
--- a/lib/libc/db/btree/extern.h
+++ b/lib/libc/db/btree/extern.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1991, 1993
+ * Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)extern.h 8.3 (Berkeley) 2/21/94
+ * @(#)extern.h 8.10 (Berkeley) 7/20/94
*/
int __bt_close __P((DB *));
@@ -39,9 +39,8 @@ int __bt_crsrdel __P((BTREE *, EPGNO *));
int __bt_defcmp __P((const DBT *, const DBT *));
size_t __bt_defpfx __P((const DBT *, const DBT *));
int __bt_delete __P((const DB *, const DBT *, u_int));
-int __bt_dleaf __P((BTREE *, PAGE *, int));
+int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int));
int __bt_fd __P((const DB *));
-EPG *__bt_first __P((BTREE *, const DBT *, int *));
int __bt_free __P((BTREE *, PAGE *));
int __bt_get __P((const DB *, const DBT *, DBT *, u_int));
PAGE *__bt_new __P((BTREE *, pgno_t *));
@@ -49,15 +48,16 @@ void __bt_pgin __P((void *, pgno_t, void *));
void __bt_pgout __P((void *, pgno_t, void *));
int __bt_push __P((BTREE *, pgno_t, int));
int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int));
-int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *));
+int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int));
EPG *__bt_search __P((BTREE *, const DBT *, int *));
int __bt_seq __P((const DB *, DBT *, DBT *, u_int));
+void __bt_setcur __P((BTREE *, pgno_t, u_int));
int __bt_split __P((BTREE *, PAGE *,
- const DBT *, const DBT *, int, size_t, indx_t));
+ const DBT *, const DBT *, int, size_t, u_int32_t));
int __bt_sync __P((const DB *, u_int));
int __ovfl_delete __P((BTREE *, void *));
-int __ovfl_get __P((BTREE *, void *, size_t *, char **, size_t *));
+int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *));
int __ovfl_put __P((BTREE *, const DBT *, pgno_t *));
#ifdef DEBUG
OpenPOWER on IntegriCloud