diff options
author | avg <avg@FreeBSD.org> | 2016-04-16 07:35:53 +0000 |
---|---|---|
committer | avg <avg@FreeBSD.org> | 2016-04-16 07:35:53 +0000 |
commit | 095cceed9085ed7de642250e596310e8e23e200b (patch) | |
tree | 5d093ef18f9017cbdb20ea1726482af68fae8bd1 /sys/cddl | |
parent | 101fd28b906d176c74aac84960766ea8caaa56f7 (diff) | |
download | FreeBSD-src-095cceed9085ed7de642250e596310e8e23e200b.zip FreeBSD-src-095cceed9085ed7de642250e596310e8e23e200b.tar.gz |
zfs: enable vn_io_fault support
Note that now we have to account for possible partial writes
in dmu_write_uio_dbuf(). It seems that on illumos either all or none
of the data are expected to be written. But the partial writes are
quite expected when vn_io_fault support is enabled.
Reviewed by: kib
MFC after: 7 weeks
Differential Revision: https://reviews.freebsd.org/D2790
Diffstat (limited to 'sys/cddl')
3 files changed, 49 insertions, 1 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 706ec94..bb81879 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) else XUIOSTAT_BUMP(xuiostat_rbuf_copied); } else { +#ifdef illumos err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_READ, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, + tocpy, uio); +#endif } if (err) break; @@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); +#ifdef illumos /* * XXX uiomove could block forever (eg. nfs-backed * pages). There needs to be a uiolockdown() function @@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) */ err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy, + uio); +#endif if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index 78d3fdc..932b61a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname) vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; + vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ /* * The fsid is 64 bits, composed of an 8-bit fs type, which diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 96b1cec..5440671 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); +#ifdef illumos error = uiomove(va + off, bytes, UIO_READ, uio); +#else + error = vn_io_fault_uiomove(va + off, bytes, uio); +#endif zfs_unmap_page(sf); zfs_vmobject_wlock(obj); page_unhold(pp); @@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) * holding up the transaction if the data copy hangs * up on a pagefault (e.g., from an NFS server mapping). */ +#ifdef illumos size_t cbytes; +#endif abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); +#ifdef illumos if (error = uiocopy(abuf->b_data, max_blksz, UIO_WRITE, uio, &cbytes)) { dmu_return_arcbuf(abuf); break; } ASSERT(cbytes == max_blksz); +#else + ssize_t resid = uio->uio_resid; + error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); + if (error != 0) { + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + dmu_return_arcbuf(abuf); + break; + } +#endif } /* @@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } +#ifdef illumos ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); +#endif } if (tx_bytes && vn_has_cached_data(vp)) { update_pages(vp, woff, tx_bytes, zfsvfs->z_os, @@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) while ((end_size = zp->z_size) < uio->uio_loffset) { (void) atomic_cas_64(&zp->z_size, end_size, uio->uio_loffset); +#ifdef illumos ASSERT(error == 0); +#else + ASSERT(error == 0 || error == EFAULT); +#endif } /* * If we are replaying and eof is non zero then force @@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) zp->z_size = zfsvfs->z_replay_eof; - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + if (error == 0) + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + else + (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); dmu_tx_commit(tx); @@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) return (error); } +#ifdef __FreeBSD__ + /* + * EFAULT means that at least one page of the source buffer was not + * available. VFS will re-try remaining I/O upon this error. + */ + if (error == EFAULT) { + ZFS_EXIT(zfsvfs); + return (error); + } +#endif + if (ioflag & (FSYNC | FDSYNC) || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); |