diff options
Diffstat (limited to 'sys/kern/vfs_subr.c')
-rw-r--r-- | sys/kern/vfs_subr.c | 167 |
1 files changed, 55 insertions, 112 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 7f72ed1..0da6764 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mount.h> #include <sys/namei.h> +#include <sys/pctrie.h> #include <sys/priv.h> #include <sys/reboot.h> #include <sys/rwlock.h> @@ -184,6 +185,8 @@ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; +static uma_zone_t buf_trie_zone; + /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; @@ -284,6 +287,24 @@ SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, static int vnsz2log; /* + * Support for the bufobj clean & dirty pctrie. + */ +static void * +buf_trie_alloc(struct pctrie *ptree) +{ + + return uma_zalloc(buf_trie_zone, M_NOWAIT); +} + +static void +buf_trie_free(struct pctrie *ptree, void *node) +{ + + uma_zfree(buf_trie_zone, node); +} +PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); + +/* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical @@ -329,6 +350,15 @@ vntblinit(void *dummy __unused) vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* + * Preallocate enough nodes to support one-per buf so that + * we can not fail an insert. reassignbuf() callers can not + * tolerate the insertion failure. + */ + buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), + NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, + UMA_ZONE_NOFREE | UMA_ZONE_VM); + uma_prealloc(buf_trie_zone, nbuf); + /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, @@ -1476,75 +1506,9 @@ restartsync: return (0); } -/* - * buf_splay() - splay tree core for the clean/dirty list of buffers in - * a vnode. - * - * NOTE: We have to deal with the special case of a background bitmap - * buffer, a situation where two buffers will have the same logical - * block offset. We want (1) only the foreground buffer to be accessed - * in a lookup and (2) must differentiate between the foreground and - * background buffer in the splay tree algorithm because the splay - * tree cannot normally handle multiple entities with the same 'index'. - * We accomplish this by adding differentiating flags to the splay tree's - * numerical domain. - */ -static -struct buf * -buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root) -{ - struct buf dummy; - struct buf *lefttreemax, *righttreemin, *y; - - if (root == NULL) - return (NULL); - lefttreemax = righttreemin = &dummy; - for (;;) { - if (lblkno < root->b_lblkno) { - if ((y = root->b_left) == NULL) - break; - if (lblkno < y->b_lblkno) { - /* Rotate right. */ - root->b_left = y->b_right; - y->b_right = root; - root = y; - if ((y = root->b_left) == NULL) - break; - } - /* Link into the new root's right tree. */ - righttreemin->b_left = root; - righttreemin = root; - } else if (lblkno > root->b_lblkno) { - if ((y = root->b_right) == NULL) - break; - if (lblkno > y->b_lblkno) { - /* Rotate left. */ - root->b_right = y->b_left; - y->b_left = root; - root = y; - if ((y = root->b_right) == NULL) - break; - } - /* Link into the new root's left tree. */ - lefttreemax->b_right = root; - lefttreemax = root; - } else { - break; - } - root = y; - } - /* Assemble the new root. */ - lefttreemax->b_right = root->b_left; - righttreemin->b_left = root->b_right; - root->b_left = dummy.b_right; - root->b_right = dummy.b_left; - return (root); -} - static void buf_vlist_remove(struct buf *bp) { - struct buf *root; struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); @@ -1556,33 +1520,23 @@ buf_vlist_remove(struct buf *bp) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; - if (bp != bv->bv_root) { - root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); - KASSERT(root == bp, ("splay lookup failed in remove")); - } - if (bp->b_left == NULL) { - root = bp->b_right; - } else { - root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left); - root->b_right = bp->b_right; - } - bv->bv_root = root; + BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* - * Add the buffer to the sorted clean or dirty block list using a - * splay tree algorithm. + * Add the buffer to the sorted clean or dirty block list. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { - struct buf *root; struct bufv *bv; + struct buf *n; + int error; ASSERT_BO_LOCKED(bo); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, @@ -1593,24 +1547,22 @@ buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) else bv = &bo->bo_clean; - root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); - if (root == NULL) { - bp->b_left = NULL; - bp->b_right = NULL; + /* + * Keep the list ordered. Optimize empty list insertion. Assume + * we tend to grow at the tail so lookup_le should usually be cheaper + * than _ge. + */ + if (bv->bv_cnt == 0 || + bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); - } else if (bp->b_lblkno < root->b_lblkno) { - bp->b_left = root->b_left; - bp->b_right = root; - root->b_left = NULL; - TAILQ_INSERT_BEFORE(root, bp, b_bobufs); - } else { - bp->b_right = root->b_right; - bp->b_left = root; - root->b_right = NULL; - TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs); - } + else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) + TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); + else + TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); + error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); + if (error) + panic("buf_vlist_add: Preallocated nodes insufficient."); bv->bv_cnt++; - bv->bv_root = bp; } /* @@ -1631,21 +1583,10 @@ gbincore(struct bufobj *bo, daddr_t lblkno) struct buf *bp; ASSERT_BO_LOCKED(bo); - if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno) - return (bp); - if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno) + bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); + if (bp != NULL) return (bp); - if ((bp = bo->bo_clean.bv_root) != NULL) { - bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp); - if (bp->b_lblkno == lblkno) - return (bp); - } - if ((bp = bo->bo_dirty.bv_root) != NULL) { - bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp); - if (bp->b_lblkno == lblkno) - return (bp); - } - return (NULL); + return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } /* @@ -2460,9 +2401,11 @@ vdropl(struct vnode *vp) VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); - VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL")); + VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, + ("clean blk trie not empty")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); - VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL")); + VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, + ("dirty blk trie not empty")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); |