summaryrefslogtreecommitdiffstats
path: root/sys/ufs
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2016-11-02 08:46:04 +0000
committerkib <kib@FreeBSD.org>2016-11-02 08:46:04 +0000
commit0f6aead99b2a420a730d47a074145dcb7d0f6721 (patch)
tree0d554afd274657f7d93dc0c9bd9eabd88e3272e2 /sys/ufs
parentb81fe3e75de30ac5b7fe710ca1919a9b8d0c296e (diff)
downloadFreeBSD-src-0f6aead99b2a420a730d47a074145dcb7d0f6721.zip
FreeBSD-src-0f6aead99b2a420a730d47a074145dcb7d0f6721.tar.gz
MFC r307626:
Add FFS pager, which uses buffer cache read operation to validate pages. For now, the pager is disabled by default in the stable branch.
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_vnops.c172
1 files changed, 168 insertions, 4 deletions
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index cdcff7e..fb40167 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/rwlock.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
@@ -86,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
+#include <vm/vm_pageout.h>
#include <vm/vnode_pager.h>
#include <ufs/ufs/extattr.h>
@@ -102,8 +104,9 @@ __FBSDID("$FreeBSD$");
#ifdef DIRECTIO
extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
#endif
-static vop_fsync_t ffs_fsync;
static vop_fdatasync_t ffs_fdatasync;
+static vop_fsync_t ffs_fsync;
+static vop_getpages_t ffs_getpages;
static vop_lock1_t ffs_lock;
static vop_read_t ffs_read;
static vop_write_t ffs_write;
@@ -119,13 +122,12 @@ static vop_openextattr_t ffs_openextattr;
static vop_setextattr_t ffs_setextattr;
static vop_vptofh_t ffs_vptofh;
-
/* Global vfs data structures for ufs. */
struct vop_vector ffs_vnodeops1 = {
.vop_default = &ufs_vnodeops,
.vop_fsync = ffs_fsync,
.vop_fdatasync = ffs_fdatasync,
- .vop_getpages = vnode_pager_local_getpages,
+ .vop_getpages = ffs_getpages,
.vop_getpages_async = vnode_pager_local_getpages_async,
.vop_lock1 = ffs_lock,
.vop_read = ffs_read,
@@ -147,7 +149,7 @@ struct vop_vector ffs_vnodeops2 = {
.vop_default = &ufs_vnodeops,
.vop_fsync = ffs_fsync,
.vop_fdatasync = ffs_fdatasync,
- .vop_getpages = vnode_pager_local_getpages,
+ .vop_getpages = ffs_getpages,
.vop_getpages_async = vnode_pager_local_getpages_async,
.vop_lock1 = ffs_lock,
.vop_read = ffs_read,
@@ -1784,3 +1786,165 @@ vop_vptofh {
ufhp->ufid_gen = ip->i_gen;
return (0);
}
+
+SYSCTL_DECL(_vfs_ffs);
+static int use_buf_pager = 0;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
+ "Always use buffer pager instead of bmap");
+static int buf_pager_relbuf;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
+ &buf_pager_relbuf, 0,
+ "Make buffer pager release buffers after reading");
+
+/*
+ * The FFS pager. It uses buffer reads to validate pages.
+ *
+ * In contrast to the generic local pager from vm/vnode_pager.c, this
+ * pager correctly and easily handles volumes where the underlying
+ * device block size is greater than the machine page size. The
+ * buffer cache transparently extends the requested page run to be
+ * aligned at the block boundary, and does the necessary bogus page
+ * replacements in the addends to avoid obliterating already valid
+ * pages.
+ *
+ * The only non-trivial issue is that the exclusive busy state for
+ * pages, which is assumed by the vm_pager_getpages() interface, is
+ * incompatible with the VMIO buffer cache's desire to share-busy the
+ * pages. This function performs a trivial downgrade of the pages'
+ * state before reading buffers, and a less trivial upgrade from the
+ * shared-busy to excl-busy state after the read.
+ */
+static int
+ffs_getpages(struct vop_getpages_args *ap)
+{
+ struct vnode *vp;
+ vm_page_t *ma, m;
+ vm_object_t object;
+ struct buf *bp;
+ struct ufsmount *um;
+ ufs_lbn_t lbn, lbnp;
+ vm_ooffset_t la, lb;
+ long bsize;
+ int bo_bs, count, error, i;
+ bool redo, lpart;
+
+ vp = ap->a_vp;
+ ma = ap->a_m;
+ count = ap->a_count;
+
+ um = VFSTOUFS(ap->a_vp->v_mount);
+ bo_bs = um->um_devvp->v_bufobj.bo_bsize;
+ if (!use_buf_pager && bo_bs <= PAGE_SIZE)
+ return (vnode_pager_generic_getpages(vp, ma, count,
+ ap->a_rbehind, ap->a_rahead, NULL, NULL));
+
+ object = vp->v_object;
+ la = IDX_TO_OFF(ma[count - 1]->pindex);
+ if (la >= object->un_pager.vnp.vnp_size)
+ return (VM_PAGER_BAD);
+ lpart = la + PAGE_SIZE > object->un_pager.vnp.vnp_size;
+ if (ap->a_rbehind != NULL) {
+ lb = IDX_TO_OFF(ma[0]->pindex);
+ *ap->a_rbehind = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
+ }
+ if (ap->a_rahead != NULL) {
+ *ap->a_rahead = OFF_TO_IDX(roundup2(la, bo_bs) - la);
+ if (la + IDX_TO_OFF(*ap->a_rahead) >=
+ object->un_pager.vnp.vnp_size) {
+ *ap->a_rahead = OFF_TO_IDX(roundup2(object->un_pager.
+ vnp.vnp_size, PAGE_SIZE) - la);
+ }
+ }
+ VM_OBJECT_WLOCK(object);
+again:
+ for (i = 0; i < count; i++)
+ vm_page_busy_downgrade(ma[i]);
+ VM_OBJECT_WUNLOCK(object);
+
+ lbnp = -1;
+ for (i = 0; i < count; i++) {
+ m = ma[i];
+
+ /*
+ * Pages are shared busy and the object lock is not
+ * owned, which together allow for the pages'
+ * invalidation. The racy test for validity avoids
+ * useless creation of the buffer for the most typical
+ * case when invalidation is not used in redo or for
+ * parallel read. The shared->excl upgrade loop at
+ * the end of the function catches the race in a
+ * reliable way (protected by the object lock).
+ */
+ if (m->valid == VM_PAGE_BITS_ALL)
+ continue;
+
+ lbn = lblkno(um->um_fs, IDX_TO_OFF(m->pindex));
+ if (lbn != lbnp) {
+ bsize = blksize(um->um_fs, VTOI(vp), lbn);
+ error = bread_gb(vp, lbn, bsize, NOCRED, GB_UNMAPPED,
+ &bp);
+ if (error != 0)
+ break;
+ KASSERT(1 /* racy, enable for debugging */ ||
+ m->valid == VM_PAGE_BITS_ALL || i == count - 1,
+ ("buf %d %p invalid", i, m));
+ if (i == count - 1 && lpart) {
+ VM_OBJECT_WLOCK(object);
+ if (m->valid != 0 &&
+ m->valid != VM_PAGE_BITS_ALL)
+ vm_page_zero_invalid(m, TRUE);
+ VM_OBJECT_WUNLOCK(object);
+ }
+ if (LIST_EMPTY(&bp->b_dep)) {
+ /*
+ * Invalidation clears m->valid, but
+ * may leave B_CACHE flag if the
+ * buffer existed at the invalidation
+ * time. In this case, recycle the
+ * buffer to do real read on next
+ * bread() after redo.
+ *
+ * Otherwise B_RELBUF is not strictly
+ * necessary, enable to reduce buf
+ * cache pressure.
+ */
+ if (buf_pager_relbuf ||
+ m->valid != VM_PAGE_BITS_ALL)
+ bp->b_flags |= B_RELBUF;
+
+ bp->b_flags &= ~B_NOCACHE;
+ brelse(bp);
+ } else {
+ bqrelse(bp);
+ }
+ lbnp = lbn;
+ }
+ }
+
+ VM_OBJECT_WLOCK(object);
+ redo = false;
+ for (i = 0; i < count; i++) {
+ vm_page_sunbusy(ma[i]);
+ ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
+
+ /*
+ * Since the pages were only sbusy while neither the
+ * buffer nor the object lock was held by us, or
+ * reallocated while vm_page_grab() slept for busy
+ * relinguish, they could have been invalidated.
+ * Recheck the valid bits and re-read as needed.
+ *
+ * Note that the last page is made fully valid in the
+ * read loop, and partial validity for the page at
+ * index count - 1 could mean that the page was
+ * invalidated or removed, so we must restart for
+ * safety as well.
+ */
+ if (ma[i]->valid != VM_PAGE_BITS_ALL)
+ redo = true;
+ }
+ if (redo && error == 0)
+ goto again;
+ VM_OBJECT_WUNLOCK(object);
+ return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
OpenPOWER on IntegriCloud