diff options
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ufs/dirhash.h | 124 | ||||
-rw-r--r-- | sys/ufs/ufs/inode.h | 2 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_dirhash.c | 1049 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 8 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_lookup.c | 95 |
5 files changed, 1276 insertions, 2 deletions
diff --git a/sys/ufs/ufs/dirhash.h b/sys/ufs/ufs/dirhash.h new file mode 100644 index 0000000..50cdc65 --- /dev/null +++ b/sys/ufs/ufs/dirhash.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2001 Ian Dowse. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _UFS_UFS_DIRHASH_H_ +#define _UFS_UFS_DIRHASH_H_ + +/* + * For fast operations on large directories, we maintain a hash + * that maps the file name to the offset of the directory entry within + * the directory file. + * + * The hashing uses a dumb spillover to the next free slot on + * collisions, so we must keep the utilisation low to avoid + * long linear searches. Deleted entries that are not the last + * in a chain must be marked DIRHASH_DEL. + * + * We also maintain a information about free space in each block + * to speed up creations. + */ +#define DIRHASH_EMPTY (-1) /* entry unused */ +#define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */ + +#define DIRALIGN 4 +#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN) + /* max DIRALIGN words in a directory entry */ + +/* + * Dirhash uses a score mechanism to achieve a hybrid between a + * least-recently-used and a least-often-used algorithm for entry + * recycling. The score is incremented when a directory is used, and + * decremented when the directory is a candidate for recycling. When + * the score reaches zero, the hash is recycled. Hashes are linked + * together on a TAILQ list, and hashes with higher scores filter + * towards the tail (most recently used) end of the list. + * + * New hash entries are given an inital score of DH_SCOREINIT and are + * placed at the most-recently-used end of the list. This helps a lot + * in the worst-case case scenario where every directory access is + * to a directory that is not hashed (i.e. the working set of hash + * candidates is much larger than the configured memry limit). In this + * case it limits the number of hash builds to 1/DH_SCOREINIT of the + * number of accesses. + */ +#define DH_SCOREINIT 8 /* initial dh_score when dirhash built */ +#define DH_SCOREMAX 64 /* max dh_score value */ + +/* + * The main hash table has 2 levels. It is an array of pointers to + * blocks of DH_NBLKOFF offsets. + */ +#define DH_BLKOFFSHIFT 8 +#define DH_NBLKOFF (1 << DH_BLKOFFSHIFT) +#define DH_BLKOFFMASK (DH_NBLKOFF - 1) + +#define DH_ENTRY(dh, slot) \ + ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK]) + +struct dirhash { + struct mtx dh_mtx; /* protects all fields except dh_list */ + + doff_t **dh_hash; /* the hash array (2-level) */ + int dh_narrays; /* number of entries in dh_hash */ + int dh_hlen; /* total slots in the 2-level hash array */ + int dh_hused; /* entries in use */ + + /* Free space statistics. XXX assumes DIRBLKSIZ is 512. */ + u_int8_t *dh_blkfree; /* free DIRALIGN words in each dir block */ + int dh_nblk; /* size of dh_blkfree array */ + int dh_dirblks; /* number of DIRBLKSIZ blocks in dir */ + int dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */ + + int dh_seqopt; /* sequential access optimisation enabled */ + doff_t dh_seqoff; /* sequential access optimisation offset */ + + int dh_score; /* access count for this dirhash */ + + int dh_onlist; /* true if on the ufsdirhash_list chain */ + + /* Protected by ufsdirhash_mtx. */ + TAILQ_ENTRY(dirhash) dh_list; /* chain of all dirhashes */ +}; + + +/* + * Dirhash functions. + */ +int ufsdirhash_build(struct inode *); +doff_t ufsdirhash_findfree(struct inode *, int, int *); +doff_t ufsdirhash_enduseful(struct inode *); +int ufsdirhash_lookup(struct inode *, char *, int, doff_t *, doff_t *); +void ufsdirhash_newblk(struct inode *, doff_t); +void ufsdirhash_add(struct inode *, struct direct *, doff_t); +void ufsdirhash_remove(struct inode *, struct direct *, doff_t); +void ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t); +void ufsdirhash_dirtrunc(struct inode *, doff_t); +void ufsdirhash_free(struct inode *); + +void ufsdirhash_checkblock(struct inode *, char *, doff_t); + +#endif /* !_UFS_UFS_DIRHASH_H_ */ diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 0fdaee2..968d950 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -94,6 +94,8 @@ struct inode { ino_t i_ino; /* Inode number of found directory. */ u_int32_t i_reclen; /* Size of found directory entry. */ u_int32_t i_spare[4]; /* XXX actually non-spare (for ext2fs). */ + + struct dirhash *i_dirhash; /* Hashing for large directories */ /* * The on-disk dinode itself. */ diff --git a/sys/ufs/ufs/ufs_dirhash.c b/sys/ufs/ufs/ufs_dirhash.c new file mode 100644 index 0000000..cc2cda9 --- /dev/null +++ b/sys/ufs/ufs/ufs_dirhash.c @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 2001 Ian Dowse. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +/* + * This implements a hash-based lookup scheme for UFS directories. + */ + +#include "opt_ufs.h" + +#ifdef UFS_DIRHASH + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/mutex.h> +#include <sys/malloc.h> +#include <sys/fnv_hash.h> +#include <sys/proc.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/sysctl.h> +#include <vm/vm_zone.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/dirhash.h> +#include <ufs/ufs/extattr.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/ufs_extern.h> + +#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) +#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) + +static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); + +SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); + +static int ufs_mindirhashsize = DIRBLKSIZ * 5; +SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, + &ufs_mindirhashsize, + 0, "minimum directory size in bytes for which to use hashed lookup"); +static int ufs_dirhashmaxmem = 2 * 1024 * 1024; +SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, + 0, "maximum allowed dirhash memory usage"); +static int ufs_dirhashmem; +SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, + 0, "current dirhash memory usage"); +static int ufs_dirhashcheck = 1; +SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, + 0, "enable extra sanity tests"); + + +static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); +static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); +static void ufsdirhash_delslot(struct dirhash *dh, int slot); +static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, + doff_t offset); +static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); +static void ufsdirhash_init(void); +static int ufsdirhash_recycle(int wanted); + +static vm_zone_t ufsdirhash_zone; + +/* Dirhash list; recently-used entries are near the tail. */ +static TAILQ_HEAD(, dirhash) ufsdirhash_list; + +/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ +static struct mtx ufsdirhash_mtx; + +/* + * Locking order: + * ufsdirhash_mtx + * dh_mtx + * + * The dh_mtx mutex should be aquired either via the inode lock, or via + * ufsdirhash_mtx. Only the owner of the inode may free the associated + * dirhash, but anything can steal its memory and set dh_hash to NULL. + */ + +/* + * Attempt to build up a hash table for the directory contents in + * inode 'ip'. Returns 0 on success, or -1 of the operation failed. + */ +int +ufsdirhash_build(struct inode *ip) +{ + struct dirhash *dh; + struct buf *bp = NULL; + struct direct *ep; + struct vnode *vp; + doff_t bmask, pos; + int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; + + /* Check if we can/should use dirhash. */ + if (ip->i_dirhash == NULL) { + if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) + return (-1); + } else { + /* Hash exists, but sysctls could have changed. */ + if (ip->i_size < ufs_mindirhashsize || + ufs_dirhashmem > ufs_dirhashmaxmem) { + ufsdirhash_free(ip); + return (-1); + } + /* Check if hash exists and is intact (note: unlocked read). */ + if (ip->i_dirhash->dh_hash != NULL) + return (0); + /* Free the old, recycled hash and build a new one. */ + ufsdirhash_free(ip); + } + + vp = ip->i_vnode; + /* Allocate 50% more entries than this dir size could ever need. */ + KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); + nslots = ip->i_size / DIRECTSIZ(1); + nslots = (nslots * 3 + 1) / 2; + narrays = howmany(nslots, DH_NBLKOFF); + nslots = narrays * DH_NBLKOFF; + dirblocks = howmany(ip->i_size, DIRBLKSIZ); + nblocks = (dirblocks * 3 + 1) / 2; + + memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + + narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + + nblocks * sizeof(*dh->dh_blkfree); + mtx_lock(&ufsdirhash_mtx); + if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { + mtx_unlock(&ufsdirhash_mtx); + if (memreqd > ufs_dirhashmaxmem / 2) + return (-1); + + /* Try to free some space. */ + if (ufsdirhash_recycle(memreqd) != 0) + return (-1); + /* Enough was freed, and ufsdirhash_mtx has been locked. */ + } + ufs_dirhashmem += memreqd; + mtx_unlock(&ufsdirhash_mtx); + + /* + * Use non-blocking mallocs so that we will revert to a linear + * lookup on failure rather than potentially blocking forever. + */ + MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); + if (dh == NULL) + return (-1); + MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), + M_DIRHASH, M_NOWAIT | M_ZERO); + MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), + M_DIRHASH, M_NOWAIT); + if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) + goto fail; + for (i = 0; i < narrays; i++) { + if ((dh->dh_hash[i] = zalloc(ufsdirhash_zone)) == NULL) + goto fail; + for (j = 0; j < DH_NBLKOFF; j++) + dh->dh_hash[i][j] = DIRHASH_EMPTY; + } + + /* Initialise the hash table and block statistics. */ + mtx_init(&dh->dh_mtx, "dirhash", MTX_DEF); + dh->dh_narrays = narrays; + dh->dh_hlen = nslots; + dh->dh_nblk = nblocks; + dh->dh_dirblks = dirblocks; + for (i = 0; i < dirblocks; i++) + dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; + for (i = 0; i < DH_NFSTATS; i++) + dh->dh_firstfree[i] = -1; + dh->dh_firstfree[DH_NFSTATS] = 0; + dh->dh_seqopt = 0; + dh->dh_seqoff = 0; + dh->dh_score = DH_SCOREINIT; + ip->i_dirhash = dh; + + bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + pos = 0; + while (pos < ip->i_size) { + /* If necessary, get the next directory block. */ + if ((pos & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) + goto fail; + } + + /* Add this entry to the hash. */ + ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); + if (ep->d_reclen == 0 || ep->d_reclen > + DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { + /* Corrupted directory. */ + brelse(bp); + goto fail; + } + if (ep->d_ino != 0) { + /* Add the entry (simplified ufsdirhash_add). */ + slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); + while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) + slot = WRAPINCR(slot, dh->dh_hlen); + dh->dh_hused++; + DH_ENTRY(dh, slot) = pos; + ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); + } + pos += ep->d_reclen; + } + + if (bp != NULL) + brelse(bp); + mtx_lock(&ufsdirhash_mtx); + TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); + dh->dh_onlist = 1; + mtx_unlock(&ufsdirhash_mtx); + return (0); + +fail: + if (dh->dh_hash != NULL) { + for (i = 0; i < narrays; i++) + if (dh->dh_hash[i] != NULL) + zfree(ufsdirhash_zone, dh->dh_hash[i]); + FREE(dh->dh_hash, M_DIRHASH); + } + if (dh->dh_blkfree != NULL) + FREE(dh->dh_blkfree, M_DIRHASH); + FREE(dh, M_DIRHASH); + ip->i_dirhash = NULL; + mtx_lock(&ufsdirhash_mtx); + ufs_dirhashmem -= memreqd; + mtx_unlock(&ufsdirhash_mtx); + return (-1); +} + +/* + * Free any hash table associated with inode 'ip'. + */ +void +ufsdirhash_free(struct inode *ip) +{ + struct dirhash *dh; + int i, mem; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&ufsdirhash_mtx); + mtx_lock(&dh->dh_mtx); + if (dh->dh_onlist) + TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); + mtx_unlock(&dh->dh_mtx); + mtx_unlock(&ufsdirhash_mtx); + + /* The dirhash pointed to by 'dh' is exclusively ours now. */ + + mem = sizeof(*dh); + if (dh->dh_hash != NULL) { + for (i = 0; i < dh->dh_narrays; i++) + zfree(ufsdirhash_zone, dh->dh_hash[i]); + FREE(dh->dh_hash, M_DIRHASH); + FREE(dh->dh_blkfree, M_DIRHASH); + mem += dh->dh_narrays * sizeof(*dh->dh_hash) + + dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + + dh->dh_nblk * sizeof(*dh->dh_blkfree); + } + mtx_destroy(&dh->dh_mtx); + FREE(dh, M_DIRHASH); + ip->i_dirhash = NULL; + + mtx_lock(&ufsdirhash_mtx); + ufs_dirhashmem -= mem; + mtx_unlock(&ufsdirhash_mtx); +} + +/* + * Find the offset of the specified name within the given inode. + * Returns 0 on success, ENOENT if the entry does not exist, or + * EJUSTRETURN if the caller should revert to a linear search. + * + * If successful, the directory offset is stored in *offp. If + * prevoffp is non-NULL, the offset of the previous entry within + * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry + * is the first in a block, the start of the block is used). + */ +int +ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, + doff_t *prevoffp) +{ + struct dirhash *dh, *dh_next; + struct direct *dp; + struct vnode *vp; + struct buf *bp; + doff_t blkoff, bmask, offset, prevoff; + int i, slot; + + if ((dh = ip->i_dirhash) == NULL) + return (EJUSTRETURN); + /* + * Move this dirhash towards the end of the list if it has a + * score higher than the next entry, and aquire the dh_mtx. + * Optimise the case where it's already the last by performing + * an unlocked read of the TAILQ_NEXT pointer. + * + * In both cases, end up holding just dh_mtx. + */ + if (TAILQ_NEXT(dh, dh_list) != NULL) { + mtx_lock(&ufsdirhash_mtx); + mtx_lock(&dh->dh_mtx); + /* + * If the new score will be greater than that of the next + * entry, then move this entry past it. With both mutexes + * held, dh_next won't go away, but its dh_score could + * change; that's not important since it is just a hint. + */ + if (dh->dh_hash != NULL && + (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && + dh->dh_score >= dh_next->dh_score) { + KASSERT(dh->dh_onlist, ("dirhash: not on list")); + TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); + TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, + dh_list); + } + mtx_unlock(&ufsdirhash_mtx); + } else { + /* Already the last, though that could change as we wait. */ + mtx_lock(&dh->dh_mtx); + } + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return (EJUSTRETURN); + } + + /* Update the score. */ + if (dh->dh_score < DH_SCOREMAX) + dh->dh_score++; + + vp = ip->i_vnode; + bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + blkoff = -1; + bp = NULL; +restart: + slot = ufsdirhash_hash(dh, name, namelen); + + if (dh->dh_seqopt) { + /* + * Sequential access optimisation. dh_seqoff contains the + * offset of the directory entry immediately following + * the last entry that was looked up. Check if this offset + * appears in the hash chain for the name we are looking for. + */ + for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; + i = WRAPINCR(i, dh->dh_hlen)) + if (offset == dh->dh_seqopt) + break; + if (offset == dh->dh_seqoff) { + /* + * We found an entry with the expected offset. This + * is probably the entry we want, but if not, the + * code below will turn off seqoff and retry. + */ + slot = i; + } else + dh->dh_seqopt = 0; + } + + for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; + slot = WRAPINCR(slot, dh->dh_hlen)) { + if (offset == DIRHASH_DEL) + continue; + mtx_unlock(&dh->dh_mtx); + + if (offset < 0 || offset >= ip->i_size) + panic("ufsdirhash_lookup: bad offset in hash array"); + if ((offset & ~bmask) != blkoff) { + if (bp != NULL) + brelse(bp); + blkoff = offset & ~bmask; + if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) + return (EJUSTRETURN); + } + dp = (struct direct *)(bp->b_data + (offset & bmask)); + if (dp->d_reclen == 0 || dp->d_reclen > + DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { + /* Corrupted directory. */ + brelse(bp); + return (EJUSTRETURN); + } + if (dp->d_namlen == namelen && + bcmp(dp->d_name, name, namelen) == 0) { + /* Found. Get the prev offset if needed. */ + if (prevoffp != NULL) { + if (offset & (DIRBLKSIZ - 1)) { + prevoff = ufsdirhash_getprev(dp, + offset); + if (prevoff == -1) { + brelse(bp); + return (EJUSTRETURN); + } + } else + prevoff = offset; + *prevoffp = prevoff; + } + + /* Check for sequential access, and update offset. */ + if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) + dh->dh_seqopt = 1; + dh->dh_seqoff = offset + DIRSIZ(0, dp); + + brelse(bp); + *offp = offset; + return (0); + } + + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + if (bp != NULL) + brelse(bp); + ufsdirhash_free(ip); + return (EJUSTRETURN); + } + /* + * When the name doesn't match in the seqopt case, go back + * and search normally. + */ + if (dh->dh_seqopt) { + dh->dh_seqopt = 0; + goto restart; + } + } + mtx_unlock(&dh->dh_mtx); + if (bp != NULL) + brelse(bp); + return (ENOENT); +} + +/* + * Find a directory block with room for 'slotneeded' bytes. Returns + * the offset of the directory entry that begins the free space. + * This will either be the offset of an existing entry that has free + * space at the end, or the offset of an entry with d_ino == 0 at + * the start of a DIRBLKSIZ block. + * + * To use the space, the caller may need to compact existing entries in + * the directory. The total number of bytes in all of the entries involved + * in the compaction is stored in *slotsize. In other words, all of + * the entries that must be compacted are exactly contained in the + * region beginning at the returned offset and spanning *slotsize bytes. + * + * Returns -1 if no space was found, indicating that the directory + * must be extended. + */ +doff_t +ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) +{ + struct direct *dp; + struct dirhash *dh; + struct buf *bp; + doff_t pos, slotstart; + int dirblock, error, freebytes, i; + + if ((dh = ip->i_dirhash) == NULL) + return (-1); + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return (-1); + } + + /* Find a directory block with the desired free space. */ + dirblock = -1; + for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) + if ((dirblock = dh->dh_firstfree[i]) != -1) + break; + if (dirblock == -1) { + mtx_unlock(&dh->dh_mtx); + return (-1); + } + + KASSERT(dirblock < dh->dh_nblk && + dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), + ("ufsdirhash_findfree: bad stats")); + mtx_unlock(&dh->dh_mtx); + pos = dirblock * DIRBLKSIZ; + error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); + if (error) + return (-1); + + /* Find the first entry with free space. */ + for (i = 0; i < DIRBLKSIZ; ) { + if (dp->d_reclen == 0) { + brelse(bp); + return (-1); + } + if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) + break; + i += dp->d_reclen; + dp = (struct direct *)((char *)dp + dp->d_reclen); + } + if (i > DIRBLKSIZ) { + brelse(bp); + return (-1); + } + slotstart = pos + i; + + /* Find the range of entries needed to get enough space */ + freebytes = 0; + while (i < DIRBLKSIZ && freebytes < slotneeded) { + freebytes += dp->d_reclen; + if (dp->d_ino != 0) + freebytes -= DIRSIZ(0, dp); + if (dp->d_reclen == 0) { + brelse(bp); + return (-1); + } + i += dp->d_reclen; + dp = (struct direct *)((char *)dp + dp->d_reclen); + } + if (i > DIRBLKSIZ) { + brelse(bp); + return (-1); + } + if (freebytes < slotneeded) + panic("ufsdirhash_findfree: free mismatch"); + brelse(bp); + *slotsize = pos + i - slotstart; + return (slotstart); +} + +/* + * Return the start of the unused space at the end of a directory, or + * -1 if there are no trailing unused blocks. + */ +doff_t +ufsdirhash_enduseful(struct inode *ip) +{ + + struct dirhash *dh; + int i; + + if ((dh = ip->i_dirhash) == NULL) + return (-1); + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return (-1); + } + + if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { + mtx_unlock(&dh->dh_mtx); + return (-1); + } + + for (i = dh->dh_dirblks - 1; i >= 0; i--) + if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) + break; + mtx_unlock(&dh->dh_mtx); + return ((doff_t)(i + 1) * DIRBLKSIZ); +} + +/* + * Insert information into the hash about a new directory entry. dirp + * points to a struct direct containing the entry, and offset specifies + * the offset of this entry. + */ +void +ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) +{ + struct dirhash *dh; + int slot; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, + ("ufsdirhash_add: bad offset")); + /* + * Normal hash usage is < 66%. If the usage gets too high then + * remove the hash entirely and let it be rebuilt later. + */ + if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + /* Find a free hash slot (empty or deleted), and add the entry. */ + slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); + while (DH_ENTRY(dh, slot) >= 0) + slot = WRAPINCR(slot, dh->dh_hlen); + if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) + dh->dh_hused++; + DH_ENTRY(dh, slot) = offset; + + /* Update the per-block summary info. */ + ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); + mtx_unlock(&dh->dh_mtx); +} + +/* + * Remove the specified directory entry from the hash. The entry to remove + * is defined by the name in `dirp', which must exist at the specified + * `offset' within the directory. + */ +void +ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) +{ + struct dirhash *dh; + int slot; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, + ("ufsdirhash_remove: bad offset")); + /* Find the entry */ + slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); + + /* Remove the hash entry. */ + ufsdirhash_delslot(dh, slot); + + /* Update the per-block summary info. */ + ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); + mtx_unlock(&dh->dh_mtx); +} + +/* + * Change the offset associated with a directory entry in the hash. Used + * when compacting directory blocks. + */ +void +ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, + doff_t newoff) +{ + struct dirhash *dh; + int slot; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && + newoff < dh->dh_dirblks * DIRBLKSIZ, + ("ufsdirhash_move: bad offset")); + /* Find the entry, and update the offset. */ + slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); + DH_ENTRY(dh, slot) = newoff; + mtx_unlock(&dh->dh_mtx); +} + +/* + * Inform dirhash that the directory has grown by one block that + * begins at offset (i.e. the new length is offset + DIRBLKSIZ). + */ +void +ufsdirhash_newblk(struct inode *ip, doff_t offset) +{ + struct dirhash *dh; + int block; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, + ("ufsdirhash_newblk: bad offset")); + block = offset / DIRBLKSIZ; + if (block >= dh->dh_nblk) { + /* Out of space; must rebuild. */ + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + dh->dh_dirblks = block + 1; + + /* Account for the new free block. */ + dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; + if (dh->dh_firstfree[DH_NFSTATS] == -1) + dh->dh_firstfree[DH_NFSTATS] = block; + mtx_unlock(&dh->dh_mtx); +} + +/* + * Inform dirhash that the directory is being truncated. + */ +void +ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) +{ + struct dirhash *dh; + int block, i; + + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, + ("ufsdirhash_dirtrunc: bad offset")); + block = howmany(offset, DIRBLKSIZ); + /* + * If the directory shrinks to less than 1/8 of dh_nblk blocks + * (about 20% of its original size due to the 50% extra added in + * ufsdirhash_build) then free it, and let the caller rebuild + * if necessary. + */ + if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + /* + * Remove any `first free' information pertaining to the + * truncated blocks. All blocks we're removing should be + * completely unused. + */ + if (dh->dh_firstfree[DH_NFSTATS] >= block) + dh->dh_firstfree[DH_NFSTATS] = -1; + for (i = block; i < dh->dh_dirblks; i++) + if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) + panic("ufsdirhash_dirtrunc: blocks in use"); + for (i = 0; i < DH_NFSTATS; i++) + if (dh->dh_firstfree[i] >= block) + panic("ufsdirhash_dirtrunc: first free corrupt"); + dh->dh_dirblks = block; + mtx_unlock(&dh->dh_mtx); +} + +/* + * Debugging function to check that the dirhash information about + * a directory block matches its actual contents. Panics if a mismatch + * is detected. + * + * On entry, `buf' should point to the start of an in-core + * DIRBLKSIZ-sized directory block, and `offset' should contain the + * offset from the start of the directory of that block. + */ +void +ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) +{ + struct dirhash *dh; + struct direct *dp; + int block, ffslot, i, nfree; + + if (!ufs_dirhashcheck) + return; + if ((dh = ip->i_dirhash) == NULL) + return; + mtx_lock(&dh->dh_mtx); + if (dh->dh_hash == NULL) { + mtx_unlock(&dh->dh_mtx); + ufsdirhash_free(ip); + return; + } + + block = offset / DIRBLKSIZ; + if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) + panic("ufsdirhash_checkblock: bad offset"); + + nfree = 0; + for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { + dp = (struct direct *)(buf + i); + if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) + panic("ufsdirhash_checkblock: bad dir"); + + if (dp->d_ino == 0) { + if (i != 0) + panic("ufsdirhash_checkblock: bad dir inode"); + nfree += dp->d_reclen; + continue; + } + + /* Check that the entry exists (will panic if it doesn't). */ + ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); + + nfree += dp->d_reclen - DIRSIZ(0, dp); + } + if (i != DIRBLKSIZ) + panic("ufsdirhash_checkblock: bad dir end"); + + if (dh->dh_blkfree[block] * DIRALIGN != nfree) + panic("ufsdirhash_checkblock: bad free count"); + + ffslot = nfree / DIRALIGN; + if (ffslot > DH_NFSTATS) + ffslot = DH_NFSTATS; + for (i = 0; i <= DH_NFSTATS; i++) + if (dh->dh_firstfree[i] == block && i != ffslot) + panic("ufsdirhash_checkblock: bad first-free"); + mtx_unlock(&dh->dh_mtx); +} + +/* + * Hash the specified filename into a dirhash slot. + */ +static int +ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) +{ + return (fnv_32_buf(name, namelen, FNV1_32_INIT) % dh->dh_hlen); +} + +/* + * Adjust the number of free bytes in the block containing `offset' + * by the value specified by `diff'. + * + * The caller must ensure we have exclusive access to `dh'; normally + * that means that dh_mtx should be held, but this is also called + * from ufsdirhash_build() where exclusive access can be assumed. + */ +static void +ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) +{ + int block, i, nfidx, ofidx; + + /* Update the per-block summary info. */ + block = offset / DIRBLKSIZ; + KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, + ("dirhash bad offset")); + ofidx = dh->dh_blkfree[block]; + if (ofidx > DH_NFSTATS) + ofidx = DH_NFSTATS; + dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); + nfidx = dh->dh_blkfree[block]; + if (nfidx > DH_NFSTATS) + nfidx = DH_NFSTATS; + + /* Update the `first free' list if necessary. */ + if (ofidx != nfidx) { + /* If removing, scan forward for the next block. */ + if (dh->dh_firstfree[ofidx] == block) { + for (i = block + 1; i < dh->dh_dirblks; i++) + if (dh->dh_blkfree[i] == ofidx) + break; + dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; + } + + /* Make this the new `first free' if necessary */ + if (dh->dh_firstfree[nfidx] > block || + dh->dh_firstfree[nfidx] == -1) + dh->dh_firstfree[nfidx] = block; + } +} + +/* + * Find the specified name which should have the specified offset. + * Returns a slot number, and panics on failure. + * + * `dh' must be locked on entry and remains so on return. + */ +static int +ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) +{ + int slot; + + mtx_assert(&dh->dh_mtx, MA_OWNED); + + /* Find the entry. */ + KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); + slot = ufsdirhash_hash(dh, name, namelen); + while (DH_ENTRY(dh, slot) != offset && + DH_ENTRY(dh, slot) != DIRHASH_EMPTY) + slot = WRAPINCR(slot, dh->dh_hlen); + if (DH_ENTRY(dh, slot) != offset) + panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); + + return (slot); +} + +/* + * Remove the entry corresponding to the specified slot from the hash array. + * + * `dh' must be locked on entry and remains so on return. + */ +static void +ufsdirhash_delslot(struct dirhash *dh, int slot) +{ + int i; + + mtx_assert(&dh->dh_mtx, MA_OWNED); + + /* Mark the entry as deleted. */ + DH_ENTRY(dh, slot) = DIRHASH_DEL; + + /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ + for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) + i = WRAPINCR(i, dh->dh_hlen); + if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { + for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) { + DH_ENTRY(dh, i) = DIRHASH_EMPTY; + dh->dh_hused--; + i = WRAPINCR(i, dh->dh_hlen); + } + KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); + } +} + +/* + * Given a directory entry and its offset, find the offset of the + * previous entry in the same DIRBLKSIZ-sized block. Returns an + * offset, or -1 if there is no previous entry in the block or some + * other problem occurred. + */ +static doff_t +ufsdirhash_getprev(struct direct *dirp, doff_t offset) +{ + struct direct *dp; + char *blkbuf; + doff_t blkoff, prevoff; + int entrypos, i; + + blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ + entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ + blkbuf = (char *)dirp - entrypos; + prevoff = blkoff; + + /* If `offset' is the start of a block, there is no previous entry. */ + if (entrypos == 0) + return (-1); + + /* Scan from the start of the block until we get to the entry. */ + for (i = 0; i < entrypos; i += dp->d_reclen) { + dp = (struct direct *)(blkbuf + i); + if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) + return (-1); /* Corrupted directory. */ + prevoff = blkoff + i; + } + return (prevoff); +} + +/* + * Try to free up `wanted' bytes by stealing memory from existing + * dirhashes. Returns zero with ufsdirhash_mtx locked if successful. + */ +static int +ufsdirhash_recycle(int wanted) +{ + struct dirhash *dh; + doff_t **hash; + u_int8_t *blkfree; + int i, mem, narrays; + + mtx_lock(&ufsdirhash_mtx); + while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { + /* Find a dirhash, and lock it. */ + if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { + mtx_unlock(&ufsdirhash_mtx); + return (-1); + } + mtx_lock(&dh->dh_mtx); + KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); + + /* Decrement the score; only recycle if it becomes zero. */ + if (--dh->dh_score > 0) { + mtx_unlock(&dh->dh_mtx); + mtx_unlock(&ufsdirhash_mtx); + return (-1); + } + + /* Remove it from the list and detach its memory. */ + TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); + dh->dh_onlist = 0; + hash = dh->dh_hash; + dh->dh_hash = NULL; + blkfree = dh->dh_blkfree; + dh->dh_blkfree = NULL; + narrays = dh->dh_narrays; + mem = narrays * sizeof(*dh->dh_hash) + + narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + + dh->dh_nblk * sizeof(*dh->dh_blkfree); + + /* Unlock everything, free the detached memory. */ + mtx_unlock(&dh->dh_mtx); + mtx_unlock(&ufsdirhash_mtx); + for (i = 0; i < narrays; i++) + zfree(ufsdirhash_zone, hash[i]); + FREE(hash, M_DIRHASH); + FREE(blkfree, M_DIRHASH); + + /* Account for the returned memory, and repeat if necessary. */ + mtx_lock(&ufsdirhash_mtx); + ufs_dirhashmem -= mem; + } + /* Success; return with ufsdirhash_mtx locked. */ + return (0); +} + + +static void +ufsdirhash_init() +{ + ufsdirhash_zone = zinit("DIRHASH", DH_NBLKOFF * sizeof(daddr_t), 0, + 0, 1); + mtx_init(&ufsdirhash_mtx, "dirhash list", MTX_DEF); + TAILQ_INIT(&ufsdirhash_list); +} +SYSINIT(ufsdirhash, SI_SUB_PSEUDO, SI_ORDER_ANY, ufsdirhash_init, NULL) + + +#endif /* UFS_DIRHASH */ diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 4f4638d..028e3b9 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -53,6 +53,10 @@ #include <ufs/ufs/inode.h> #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> +#ifdef UFS_DIRHASH +#include <ufs/ufs/dir.h> +#include <ufs/ufs/dirhash.h> +#endif /* * Last reference to an inode. If necessary, write or delete it. @@ -167,6 +171,10 @@ ufs_reclaim(ap) } #endif lockdestroy(&vp->v_lock); +#ifdef UFS_DIRHASH + if (ip->i_dirhash != NULL) + ufsdirhash_free(ip); +#endif FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_malloctype); vp->v_data = 0; return (0); diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 61a620d..2a3a71e 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -39,6 +39,8 @@ * $FreeBSD$ */ +#include "opt_ufs.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -58,6 +60,9 @@ #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> +#ifdef UFS_DIRHASH +#include <ufs/ufs/dirhash.h> +#endif #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufs_extern.h> @@ -128,7 +133,7 @@ ufs_lookup(ap) register struct vnode *vdp; /* vnode for directory being searched */ register struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ - register struct direct *ep; /* the current directory entry */ + struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ @@ -181,7 +186,48 @@ ufs_lookup(ap) slotstatus = NONE; slotneeded = DIRECTSIZ(cnp->cn_namelen); } + bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; +#ifdef UFS_DIRHASH + /* + * Use dirhash for fast operations on large directories. The logic + * to determine whether to hash the directory is contained within + * ufsdirhash_build(); a zero return means that it decided to hash + * this directory and it successfully built up the hash table. + */ + if (ufsdirhash_build(dp) == 0) { + /* Look for a free slot if needed. */ + enduseful = dp->i_size; + if (slotstatus != FOUND) { + slotoffset = ufsdirhash_findfree(dp, slotneeded, + &slotsize); + if (slotoffset >= 0) { + slotstatus = COMPACT; + enduseful = ufsdirhash_enduseful(dp); + if (enduseful < 0) + enduseful = dp->i_size; + } + } + /* Look up the component. */ + numdirpasses = 1; + entryoffsetinblock = 0; /* silence compiler warning */ + switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, + &dp->i_offset, nameiop == DELETE ? &prevoff : NULL)) { + case 0: + error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, + (char **)&ep, &bp); + if (error) + return (error); + goto foundentry; + case ENOENT: + dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); + goto notfound; + default: + /* Something failed; just do a linear search. */ + break; + } + } +#endif /* UFS_DIRHASH */ /* * If there is cached information on a previous search of * this directory, pick up where we last left off. @@ -193,7 +239,6 @@ ufs_lookup(ap) * profiling time and hence has been removed in the interest * of simplicity. */ - bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff >= dp->i_size) { entryoffsetinblock = 0; @@ -299,6 +344,9 @@ searchloop: (cnp->cn_nameptr[0] == ep->d_name[0]) && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { +#ifdef UFS_DIRHASH +foundentry: +#endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release @@ -732,6 +780,14 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL) { + ufsdirhash_newblk(dp, dp->i_offset); + ufsdirhash_add(dp, dirp, dp->i_offset); + ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, + dp->i_offset); + } +#endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a @@ -828,6 +884,11 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) } dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL) + ufsdirhash_move(dp, nep, dp->i_offset + loc, + dp->i_offset + ((char *)ep - dirbuf)); +#endif loc += nep->d_reclen; if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(dp, dirbuf, @@ -852,7 +913,18 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL && (ep->d_ino == 0 || + dirp->d_reclen == spacefree)) + ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); +#endif bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL) + ufsdirhash_checkblock(dp, dirbuf - + (dp->i_offset & (DIRBLKSIZ - 1)), + dp->i_offset & ~(DIRBLKSIZ - 1)); +#endif if (DOINGSOFTDEP(dvp)) { (void) softdep_setup_directory_add(bp, dp, @@ -878,6 +950,10 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp, 0, p); +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL) + ufsdirhash_dirtrunc(dp, dp->i_endoff); +#endif (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); @@ -926,6 +1002,15 @@ ufs_dirremove(dvp, ip, flags, isrmdir) if ((error = UFS_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); +#ifdef UFS_DIRHASH + /* + * Remove the dirhash entry. This is complicated by the fact + * that `ep' is the previous entry when dp->i_count != 0. + */ + if (dp->i_dirhash != NULL) + ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : + (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); +#endif if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. @@ -937,6 +1022,12 @@ ufs_dirremove(dvp, ip, flags, isrmdir) */ ep->d_reclen += dp->i_reclen; } +#ifdef UFS_DIRHASH + if (dp->i_dirhash != NULL) + ufsdirhash_checkblock(dp, (char *)ep - + ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), + dp->i_offset & ~(DIRBLKSIZ - 1)); +#endif out: if (DOINGSOFTDEP(dvp)) { if (ip) { |