summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2004-10-29 10:15:56 +0000
committerphk <phk@FreeBSD.org>2004-10-29 10:15:56 +0000
commitd9d9558b8bc8b5630a1f2947c1b6f440a1356e48 (patch)
treed3100547372d59aabedd386e4468ce4c37a07987 /sys/ufs
parentf0dd76e153ceb481cfd9f2ab5bbb02e7d8a294c9 (diff)
downloadFreeBSD-src-d9d9558b8bc8b5630a1f2947c1b6f440a1356e48.zip
FreeBSD-src-d9d9558b8bc8b5630a1f2947c1b6f440a1356e48.tar.gz
Move UFS from DEVFS backing to GEOM backing.
This eliminates a bunch of vnode overhead (approx 1-2 % speed improvement) and gives us more control over the access to the storage device. Access counts on the underlying device are not correctly tracked and therefore it is possible to read-only mount the same disk device multiple times: syv# mount -p /dev/md0 /var ufs rw 2 2 /dev/ad0 /mnt ufs ro 1 1 /dev/ad0 /mnt2 ufs ro 1 1 /dev/ad0 /mnt3 ufs ro 1 1 Since UFS/FFS is not a synchrousely consistent filesystem (ie: it caches things in RAM) this is not possible with read-write mounts, and the system will correctly reject this. Details: Add a geom consumer and a bufobj pointer to ufsmount. Eliminate the vnode argument from softdep_disk_prewrite(). Pick the vnode out of bp->b_vp for now. Eventually we should find it through bp->b_bufobj->b_private. In the mountcode, use g_vfs_open() once we have used VOP_ACCESS() to check permissions. When upgrading and downgrading between r/o and r/w do the right thing with GEOM access counts. Remove all the workarounds for not being able to do this with VOP_OPEN(). If we are the root mount, drop the exclusive access count until we upgrade to r/w. This allows fsck of the root filesystem and the MNT_RELOAD to work correctly. Set bo_private to the GEOM consumer on the device bufobj. Change the ffs_ops->strategy function to call g_vfs_strategy() In ufs_strategy() directly call the strategy on the disk bufobj. Same in rawread. In ffs_fsync() we will no longer see VCHR device nodes, so remove code which synced the filesystem mounted on it, in case we came there. I'm not sure this code made sense in the first place since we would have taken the specfs route on such a vnode. Redo the highly bogus readblock() function in the snapshot code to something slightly less bogus: Constructing an uio and using physio was really quite a detour. Instead just fill in a bio and ship it down.
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c3
-rw-r--r--sys/ufs/ffs/ffs_extern.h2
-rw-r--r--sys/ufs/ffs/ffs_rawread.c6
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c30
-rw-r--r--sys/ufs/ffs/ffs_softdep.c30
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c123
-rw-r--r--sys/ufs/ffs/ffs_vnops.c13
-rw-r--r--sys/ufs/ufs/inode.h1
-rw-r--r--sys/ufs/ufs/ufs_vnops.c10
-rw-r--r--sys/ufs/ufs/ufsmount.h2
10 files changed, 96 insertions, 124 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index fa1c342..7331a0a 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1728,9 +1728,6 @@ ffs_blkfree(fs, devvp, bno, size, inum)
return;
}
#ifdef DIAGNOSTIC
- if (dev->si_mountpoint &&
- (dev->si_mountpoint->mnt_kern_flag & MNTK_SUSPENDED))
- panic("ffs_blkfree: deallocation on suspended filesystem");
if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 083d4c7..a56d6de 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -118,7 +118,7 @@ void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *);
void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vop_fsync_args *);
-int softdep_disk_prewrite(struct vnode *vp, struct buf *bp);
+int softdep_disk_prewrite(struct buf *bp);
/* XXX incorrectly moved to mount.h - should be indirect function */
#if 0
int softdep_fsync(struct vnode *vp);
diff --git a/sys/ufs/ffs/ffs_rawread.c b/sys/ufs/ffs/ffs_rawread.c
index ca1779c..2594ae4 100644
--- a/sys/ufs/ffs/ffs_rawread.c
+++ b/sys/ufs/ffs/ffs_rawread.c
@@ -248,15 +248,11 @@ ffs_rawread_readahead(struct vnode *vp,
if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
bp->b_bufsize = bp->b_bcount;
- bp->b_dev = dp->v_rdev;
if (vmapbuf(bp) < 0)
return EFAULT;
- if (dp->v_type == VCHR)
- (void) VOP_SPECSTRATEGY(dp, bp);
- else
- (void) VOP_STRATEGY(dp, bp);
+ dp->v_bufobj.bo_ops->bop_strategy(&dp->v_bufobj, bp);
return 0;
}
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 7206eae..c137d16 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -52,6 +52,8 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/vnode.h>
+#include <geom/geom.h>
+
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
@@ -2119,19 +2121,21 @@ readblock(vp, bp, lbn)
struct buf *bp;
ufs2_daddr_t lbn;
{
- struct uio auio;
- struct iovec aiov;
- struct thread *td = curthread;
struct inode *ip = VTOI(vp);
+ struct bio *bip;
- aiov.iov_base = bp->b_data;
- aiov.iov_len = bp->b_bcount;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
- auio.uio_resid = bp->b_bcount;
- auio.uio_rw = UIO_READ;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_td = td;
- return (physio(ip->i_devvp->v_rdev, &auio, 0));
+ bip = g_alloc_bio();
+ bip->bio_cmd = BIO_READ;
+ bip->bio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
+ bip->bio_data = bp->b_data;
+ bip->bio_length = bp->b_bcount;
+
+ g_io_request(bip, ip->i_devvp->v_bufobj.bo_private);
+
+ do
+ msleep(bip, NULL, PRIBIO, "snaprdb", hz/10);
+ while (!(bip->bio_flags & BIO_DONE));
+ bp->b_error = bip->bio_error;
+ g_destroy_bio(bip);
+ return (bp->b_error);
}
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 42dfc56..9101d40 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -3416,9 +3416,10 @@ handle_workitem_freefile(freefile)
}
int
-softdep_disk_prewrite(struct vnode *vp, struct buf *bp)
+softdep_disk_prewrite(struct buf *bp)
{
int error;
+ struct vnode *vp = bp->b_vp;
KASSERT(bp->b_iocmd == BIO_WRITE,
("softdep_disk_prewrite on non-BIO_WRITE buffer"));
@@ -4983,17 +4984,8 @@ softdep_sync_metadata(ap)
struct worklist *wk;
int i, error, waitfor;
- /*
- * Check whether this vnode is involved in a filesystem
- * that is doing soft dependency processing.
- */
- if (!vn_isdisk(vp, NULL)) {
- if (!DOINGSOFTDEP(vp))
- return (0);
- } else
- if (vp->v_rdev->si_mountpoint == NULL ||
- (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP) == 0)
- return (0);
+ if (!DOINGSOFTDEP(vp))
+ return (0);
/*
* Ensure that any direct block dependencies have been cleared.
*/
@@ -5222,18 +5214,6 @@ loop:
VI_UNLOCK(vp);
FREE_LOCK(&lk);
- /*
- * If we are trying to sync a block device, some of its buffers may
- * contain metadata that cannot be written until the contents of some
- * partially written files have been written to disk. The only easy
- * way to accomplish this is to sync the entire filesystem (luckily
- * this happens rarely).
- */
- if (vn_isdisk(vp, NULL) &&
- vp->v_rdev->si_mountpoint && !VOP_ISLOCKED(vp, NULL) &&
- (error = VFS_SYNC(vp->v_rdev->si_mountpoint, MNT_WAIT, ap->a_cred,
- ap->a_td)) != 0)
- return (error);
return (0);
}
@@ -5912,6 +5892,8 @@ getdirtybuf(bpp, mtx, waitfor)
/*
* Wait for pending output on a vnode to complete.
* Must be called with vnode lock and interlock locked.
+ *
+ * XXX: Should just be a call to bufobj_wwait().
*/
static void
drain_output(vp, islocked)
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 6faef7a..6c10052 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -66,6 +66,9 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/vm_page.h>
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
static int ffs_sbupdate(struct ufsmount *, int);
@@ -240,6 +243,11 @@ ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td)
return (error);
}
vn_finished_write(mp);
+ DROP_GIANT();
+ g_topology_lock();
+ g_access(ump->um_cp, 0, -1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
}
if ((mp->mnt_flag & MNT_RELOAD) &&
(error = ffs_reload(mp, td)) != 0)
@@ -258,6 +266,20 @@ ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td)
}
VOP_UNLOCK(devvp, 0, td);
}
+ DROP_GIANT();
+ g_topology_lock();
+ /*
+ * If we're the root device, we may not have an E count
+ * yet, get it now.
+ */
+ if (ump->um_cp->ace == 0)
+ error = g_access(ump->um_cp, 0, 1, 1);
+ else
+ error = g_access(ump->um_cp, 0, 1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ if (error)
+ return (error);
fs->fs_flags &= ~FS_UNCLEAN;
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
@@ -350,8 +372,7 @@ ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td)
* then it's not correct.
*/
- if (devvp != ump->um_devvp &&
- devvp->v_rdev != ump->um_devvp->v_rdev)
+ if (devvp->v_rdev != ump->um_devvp->v_rdev)
error = EINVAL; /* needs translation */
vrele(devvp);
if (error)
@@ -412,7 +433,6 @@ ffs_reload(struct mount *mp, struct thread *td)
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0)
panic("ffs_reload: dirty1");
-
vfs_object_create(devvp, td, td->td_ucred);
VOP_UNLOCK(devvp, 0, td);
@@ -552,45 +572,45 @@ ffs_mountfs(devvp, mp, td)
int32_t *lp;
struct ucred *cred;
size_t strsize;
+ struct g_consumer *cp;
dev = devvp->v_rdev;
cred = td ? td->td_ucred : NOCRED;
+
+ vfs_object_create(devvp, td, cred);
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+#if 0
/*
- * Disallow multiple mounts of the same device.
- * Disallow mounting of a device that is currently in use
- * (except for root, which might share swap device for miniroot).
- * Flush out any old buffers remaining from a previous use.
+ * XXX: check filesystem permissions, they may be more strict
+ * XXX: than what geom enforces.
+ * XXX: But since we're root, they wouldn't matter, would they ?
*/
- error = vfs_mountedon(devvp);
- if (error)
- return (error);
- if (vcount(devvp) > 1)
- return (EBUSY);
- vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
- error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
+ error = VOP_ACCESS(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td);
if (error) {
VOP_UNLOCK(devvp, 0, td);
return (error);
}
-
+#endif
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
+#if 0
/*
* Note that it is optional that the backing device be VMIOed. This
* increases the opportunity for metadata caching.
*/
vfs_object_create(devvp, td, cred);
+#endif
- ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
/*
- * XXX: open the device with read and write access even if only
- * read access is needed now. Write access is needed if the
- * filesystem is ever mounted read/write, and we don't change the
- * access mode for remounts.
+ * If we are a root mount, drop the E flag so fsck can do its magic.
+ * We will pick it up again when we remounte R/W.
*/
-#ifdef notyet
- error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
-#else
- error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
-#endif
+ if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
+ error = g_access(cp, 0, 0, -1);
+ g_topology_unlock();
+ PICKUP_GIANT();
VOP_UNLOCK(devvp, 0, td);
if (error)
return (error);
@@ -599,6 +619,7 @@ ffs_mountfs(devvp, mp, td)
if (mp->mnt_iosize_max > MAXPHYS)
mp->mnt_iosize_max = MAXPHYS;
+ devvp->v_bufobj.bo_private = cp;
devvp->v_bufobj.bo_ops = &ffs_ops;
bp = NULL;
@@ -663,6 +684,8 @@ ffs_mountfs(devvp, mp, td)
fs->fs_pendinginodes = 0;
}
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
+ ump->um_cp = cp;
+ ump->um_bo = &devvp->v_bufobj;
ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
if (fs->fs_magic == FS_UFS1_MAGIC) {
ump->um_fstype = UFS1;
@@ -751,8 +774,6 @@ ffs_mountfs(devvp, mp, td)
#ifdef UFS_EXTATTR
ufs_extattr_uepm_init(&ump->um_extattr);
#endif
- devvp->v_rdev->si_mountpoint = mp;
-
/*
* Set FS local "last mounted on" information (NULL pad)
*/
@@ -804,15 +825,15 @@ ffs_mountfs(devvp, mp, td)
#endif /* !UFS_EXTATTR */
return (0);
out:
- devvp->v_rdev->si_mountpoint = NULL;
if (bp)
brelse(bp);
- /* XXX: see comment above VOP_OPEN. */
-#ifdef notyet
- (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
-#else
- (void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
-#endif
+ if (cp != NULL) {
+ DROP_GIANT();
+ g_topology_lock();
+ g_wither_geom_close(cp->geom, ENXIO);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ }
if (ump) {
free(ump->um_fs, M_UFSMNT);
free(ump, M_UFSMNT);
@@ -964,16 +985,12 @@ ffs_unmount(mp, mntflags, td)
return (error);
}
}
- ump->um_devvp->v_rdev->si_mountpoint = NULL;
-
vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
- /* XXX: see comment above VOP_OPEN. */
-#ifdef notyet
- error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
- NOCRED, td);
-#else
- error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
-#endif
+ DROP_GIANT();
+ g_topology_lock();
+ g_wither_geom_close(ump->um_cp->geom, ENXIO);
+ g_topology_unlock();
+ PICKUP_GIANT();
vrele(ump->um_devvp);
free(fs->fs_csp, M_UFSMNT);
free(fs, M_UFSMNT);
@@ -1533,24 +1550,10 @@ ffs_ifree(struct ufsmount *ump, struct inode *ip)
static void
ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
{
- int i = 0;
- struct vnode *vp;
- vp = bp->b_vp;
-#if 0
- KASSERT(vp == bo->bo_vnode, ("Inconsistent vnode bufstrategy"));
- KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
- ("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp));
-#endif
- if (vp->v_type == VCHR) {
#ifdef SOFTUPDATES
- if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp->b_vp, bp))
- return;
+ if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
+ return;
#endif
- i = VOP_SPECSTRATEGY(vp, bp);
- } else {
- i = VOP_STRATEGY(vp, bp);
- }
- KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
+ g_vfs_strategy(bo, bp);
}
-
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 3431408..e903b6d 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -178,14 +178,7 @@ ffs_fsync(ap)
ufs_lbn_t lbn;
wait = (ap->a_waitfor == MNT_WAIT);
- if (vn_isdisk(vp, NULL)) {
- lbn = INT_MAX;
- if (vp->v_rdev->si_mountpoint != NULL &&
- (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP))
- softdep_fsync_mountdev(vp);
- } else {
- lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
- }
+ lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
/*
* Flush all dirty buffers associated with a vnode.
@@ -225,8 +218,6 @@ loop:
VI_UNLOCK(vp);
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
- if (vp != bp->b_vp)
- panic("ffs_fsync: vp != vp->b_vp");
/*
* If this is a synchronous flush request, or it is not a
* file or device, start the write on this buffer immediatly.
@@ -1212,7 +1203,7 @@ ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td
}
/*
- * Vnode extattr strategy routine for special devices and fifos.
+ * Vnode extattr strategy routine for fifos.
*
* We need to check for a read or write of the external attributes.
* Otherwise we just fall through and do the usual thing.
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 66254e9..558e9a8 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -123,6 +123,7 @@ struct inode {
#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */
#define i_devvp i_ump->um_devvp
+#define i_umbufobj i_ump->um_bo
#define i_dirhash i_un.dirhash
#define i_snapblklist i_un.snapblklist
#define i_din1 dinode_u.din1
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 8ced7ce..d97fdff 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1927,6 +1927,7 @@ ufs_strategy(ap)
{
struct buf *bp = ap->a_bp;
struct vnode *vp = ap->a_vp;
+ struct bufobj *bo;
struct inode *ip;
ufs2_daddr_t blkno;
int error;
@@ -1948,14 +1949,9 @@ ufs_strategy(ap)
bufdone(bp);
return (0);
}
- vp = ip->i_devvp;
- bp->b_dev = vp->v_rdev;
bp->b_iooffset = dbtob(bp->b_blkno);
-#ifdef SOFTUPDATES
- if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(vp, bp))
- return (0);
-#endif
- VOP_SPECSTRATEGY(vp, bp);
+ bo = ip->i_umbufobj;
+ bo->bo_ops->bop_strategy(bo, bp);
return (0);
}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index f8f8db0..be18c2d 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -60,6 +60,8 @@ struct ufs_extattr_per_mount;
struct ufsmount {
struct mount *um_mountp; /* filesystem vfs structure */
struct cdev *um_dev; /* device mounted */
+ struct g_consumer *um_cp;
+ struct bufobj *um_bo; /* Buffer cache object */
struct vnode *um_devvp; /* block device mounted vnode */
u_long um_fstype; /* type of filesystem */
struct fs *um_fs; /* pointer to superblock */
OpenPOWER on IntegriCloud