diff options
Diffstat (limited to 'sys/cddl')
13 files changed, 414 insertions, 270 deletions
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c index 848007e..4214ca8 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c @@ -89,13 +89,14 @@ traverse(vnode_t **cvpp, int lktype) if (vfsp == NULL) break; error = vfs_busy(vfsp, 0); + /* * tvp is NULL for *cvpp vnode, which we can't unlock. - * At least some callers expect the reference to be - * maintained to the original *cvpp */ if (tvp != NULL) vput(cvp); + else + vrele(cvp); if (error) return (error); diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c index 49becbc..6af1e8b 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -121,34 +121,39 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, struct ucred *cr; int error; + ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot"); + + vp = *vpp; + *vpp = NULL; + error = 0; + /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) - return (ENAMETOOLONG); - - vfsp = vfs_byname_kld(fstype, td, &error); - if (vfsp == NULL) - return (ENODEV); - - vp = *vpp; - if (vp->v_type != VDIR) - return (ENOTDIR); + error = ENAMETOOLONG; + if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL) + error = ENODEV; + if (error == 0 && vp->v_type != VDIR) + error = ENOTDIR; /* * We need vnode lock to protect v_mountedhere and vnode interlock * to protect v_iflag. */ - vn_lock(vp, LK_SHARED | LK_RETRY); - VI_LOCK(vp); - if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { + if (error == 0) { + VI_LOCK(vp); + if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) + vp->v_iflag |= VI_MOUNT; + else + error = EBUSY; VI_UNLOCK(vp); - VOP_UNLOCK(vp, 0); - return (EBUSY); } - vp->v_iflag |= VI_MOUNT; - VI_UNLOCK(vp); + if (error != 0) { + vput(vp); + return (error); + } VOP_UNLOCK(vp, 0); /* @@ -198,7 +203,6 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, vfs_unbusy(mp); vfs_freeopts(mp->mnt_optnew); vfs_mount_destroy(mp); - *vpp = NULL; return (error); } @@ -229,7 +233,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); - vput(vp); + VOP_UNLOCK(vp, 0); vfs_unbusy(mp); *vpp = mvp; return (0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c index ed99c4b..a087e6e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c @@ -7288,7 +7288,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; - int rval; + uint64_t rval; rval = dtrace_dif_emulate(dp, &mstate, vstate, state); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c index c5c5c00..27d259d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c @@ -442,19 +442,9 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) *vpp = dvp; return (0); } else if (strcmp(nm, "..") == 0) { - if (pvp == NULL) { - ASSERT(dvp->v_flag & VROOT); - VN_HOLD(dvp); - *vpp = dvp; - ASSERT_VOP_ELOCKED(dvp, "gfs_lookup_dot: non-locked dvp"); - } else { - ltype = VOP_ISLOCKED(dvp); - VOP_UNLOCK(dvp, 0); - VN_HOLD(pvp); - *vpp = pvp; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); - vn_lock(dvp, ltype | LK_RETRY); - } + ASSERT(pvp != NULL); + VN_HOLD(pvp); + *vpp = pvp; return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index f6d19fe..9430037 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -233,10 +233,15 @@ int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ u_int zfs_arc_free_target = 0; +/* Absolute min for arc min / max is 16MB. */ +static uint64_t arc_abs_min = 16 << 20; + static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); +static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); +static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); -#ifdef _KERNEL +#if defined(__FreeBSD__) && defined(_KERNEL) static void arc_free_target_init(void *unused __unused) { @@ -253,10 +258,10 @@ TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min); TUNABLE_QUAD("vfs.zfs.arc_average_blocksize", &zfs_arc_average_blocksize); TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift); SYSCTL_DECL(_vfs_zfs); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, - "Maximum ARC size"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0, - "Minimum ARC size"); +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_U64 | CTLFLAG_RWTUN, + 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size"); +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_U64 | CTLFLAG_RWTUN, + 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, &zfs_arc_average_blocksize, 0, "ARC average blocksize"); @@ -882,7 +887,7 @@ struct arc_buf_hdr { l1arc_buf_hdr_t b_l1hdr; }; -#ifdef _KERNEL +#if defined(__FreeBSD__) && defined(_KERNEL) static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) { @@ -900,6 +905,82 @@ sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) arc_meta_limit = val; return (0); } + +static int +sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + int err; + + val = zfs_arc_max; + err = sysctl_handle_64(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (zfs_arc_max == 0) { + /* Loader tunable so blindly set */ + zfs_arc_max = val; + return (0); + } + + if (val < arc_abs_min || val > kmem_size()) + return (EINVAL); + if (val < arc_c_min) + return (EINVAL); + if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) + return (EINVAL); + + arc_c_max = val; + + arc_c = arc_c_max; + arc_p = (arc_c >> 1); + + if (zfs_arc_meta_limit == 0) { + /* limit meta-data to 1/4 of the arc capacity */ + arc_meta_limit = arc_c_max / 4; + } + + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) + arc_c = arc_c / 2; + + zfs_arc_max = arc_c; + + return (0); +} + +static int +sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + int err; + + val = zfs_arc_min; + err = sysctl_handle_64(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (zfs_arc_min == 0) { + /* Loader tunable so blindly set */ + zfs_arc_min = val; + return (0); + } + + if (val < arc_abs_min || val > arc_c_max) + return (EINVAL); + + arc_c_min = val; + + if (zfs_arc_meta_min == 0) + arc_meta_min = arc_c_min / 2; + + if (arc_c < arc_c_min) + arc_c = arc_c_min; + + zfs_arc_min = arc_c_min; + + return (0); +} #endif static arc_buf_t *arc_eviction_list; @@ -2251,6 +2332,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, hdr->b_l1hdr.b_buf->b_data); ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF); + hdr->b_l1hdr.b_tmp_cdata = NULL; return; } @@ -5320,8 +5402,8 @@ arc_init(void) arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); #endif #endif /* illumos */ - /* set min cache to 1/32 of all memory, or 16MB, whichever is more */ - arc_c_min = MAX(arc_c / 4, 16 << 20); + /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */ + arc_c_min = MAX(arc_c / 4, arc_abs_min); /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ if (arc_c * 8 >= 1 << 30) arc_c_max = (arc_c * 8) - (1 << 30); @@ -5342,11 +5424,11 @@ arc_init(void) #ifdef _KERNEL /* * Allow the tunables to override our calculations if they are - * reasonable (ie. over 16MB) + * reasonable. */ - if (zfs_arc_max > 16 << 20 && zfs_arc_max < kmem_size()) + if (zfs_arc_max > arc_abs_min && zfs_arc_max < kmem_size()) arc_c_max = zfs_arc_max; - if (zfs_arc_min > 16 << 20 && zfs_arc_min <= arc_c_max) + if (zfs_arc_min > arc_abs_min && zfs_arc_min <= arc_c_max) arc_c_min = zfs_arc_min; #endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 8e14a97..d8d186c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1083,8 +1083,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) else XUIOSTAT_BUMP(xuiostat_rbuf_copied); } else { +#ifdef illumos err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_READ, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, + tocpy, uio); +#endif } if (err) break; @@ -1178,6 +1183,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); +#ifdef illumos /* * XXX uiomove could block forever (eg. nfs-backed * pages). There needs to be a uiolockdown() function @@ -1186,6 +1192,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) */ err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy, + uio); +#endif if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c index d1b8002..9c34f45 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c @@ -49,8 +49,8 @@ #include <sys/dsl_userhold.h> #ifdef __FreeBSD__ -#include <sys/sysctl.h> #include <sys/types.h> +#include <sys/sysctl.h> #endif /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h index 1945686..8849003 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h @@ -61,7 +61,6 @@ int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp); #define ZFSCTL_INO_ROOT 0x1 #define ZFSCTL_INO_SNAPDIR 0x2 -#define ZFSCTL_INO_SHARES 0x3 #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c index 693ba41..cbafa03 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -223,7 +223,7 @@ zfsctl_root_inode_cb(vnode_t *vp, int index) { zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - ASSERT(index <= 2); + ASSERT(index < 2); if (index == 0) return (ZFSCTL_INO_SNAPDIR); @@ -294,6 +294,22 @@ zfsctl_root(znode_t *zp) return (zp->z_zfsvfs->z_ctldir); } +static int +zfsctl_common_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + vnode_t *vp = ap->a_vp; + gfs_file_t *fp = vp->v_data; + + printf(" parent = %p\n", fp->gfs_parent); + printf(" type = %d\n", fp->gfs_type); + printf(" index = %d\n", fp->gfs_index); + printf(" ino = %ju\n", (uintmax_t)fp->gfs_ino); + return (0); +} + /* * Common open routine. Disallow any write access. */ @@ -404,8 +420,6 @@ zfsctl_common_fid(ap) ZFS_EXIT(zfsvfs); return (SET_ERROR(ENOSPC)); } -#else - fidp->fid_len = SHORT_FID_LEN; #endif zfid = (zfid_short_t *)fidp; @@ -454,25 +468,6 @@ zfsctl_shares_fid(ap) return (error); } -static int -zfsctl_common_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; - struct thread *a_td; - } */ *ap; -{ - vnode_t *vp = ap->a_vp; - - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); - VI_LOCK(vp); - vp->v_data = NULL; - VI_UNLOCK(vp); - return (0); -} - /* * .zfs inode namespace * @@ -537,9 +532,20 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, ZFS_ENTER(zfsvfs); if (strcmp(nm, "..") == 0) { +#ifdef illumos err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp); +#else + /* + * NB: can not use VFS_ROOT here as it would acquire + * the vnode lock of the parent (root) vnode while + * holding the child's (.zfs) lock. + */ + znode_t *rootzp; + + err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); if (err == 0) - VOP_UNLOCK(*vpp, 0); + *vpp = ZTOV(rootzp); +#endif } else { err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, ct, direntflags, realpnp); @@ -550,6 +556,61 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, return (err); } +static int +zfsctl_freebsd_root_lookup(ap) + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + vnode_t *dvp = ap->a_dvp; + vnode_t **vpp = ap->a_vpp; + cred_t *cr = ap->a_cnp->cn_cred; + int flags = ap->a_cnp->cn_flags; + int lkflags = ap->a_cnp->cn_lkflags; + int nameiop = ap->a_cnp->cn_nameiop; + char nm[NAME_MAX + 1]; + int err; + + if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE)) + return (EOPNOTSUPP); + + ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); + strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); +relookup: + err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL); + if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) { + if (flags & ISDOTDOT) { + VOP_UNLOCK(dvp, 0); + err = vn_lock(*vpp, lkflags); + if (err != 0) { + vrele(*vpp); + *vpp = NULL; + } + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); + } else { + err = vn_lock(*vpp, LK_EXCLUSIVE); + if (err != 0) { + VERIFY3S(err, ==, ENOENT); + goto relookup; + } + } + } + return (err); +} + +static int +zfsctl_root_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + printf(" .zfs node\n"); + zfsctl_common_print(ap); + return (0); +} + #ifdef illumos static int zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, @@ -585,49 +646,6 @@ static const fs_operation_def_t zfsctl_tops_root[] = { }; #endif /* illumos */ -/* - * Special case the handling of "..". - */ -/* ARGSUSED */ -int -zfsctl_freebsd_root_lookup(ap) - struct vop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; -{ - vnode_t *dvp = ap->a_dvp; - vnode_t **vpp = ap->a_vpp; - cred_t *cr = ap->a_cnp->cn_cred; - int flags = ap->a_cnp->cn_flags; - int nameiop = ap->a_cnp->cn_nameiop; - char nm[NAME_MAX + 1]; - int err; - int ltype; - - if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE)) - return (EOPNOTSUPP); - - ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); - strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); - err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL); - if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) { - ltype = VOP_ISLOCKED(dvp); - if (flags & ISDOTDOT) { - VN_HOLD(*vpp); - VOP_UNLOCK(dvp, 0); - } - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); - if (flags & ISDOTDOT) { - VN_RELE(*vpp); - vn_lock(dvp, ltype| LK_RETRY); - } - } - - return (err); -} - static struct vop_vector zfsctl_ops_root = { .vop_default = &default_vnodeops, .vop_open = zfsctl_common_open, @@ -643,6 +661,7 @@ static struct vop_vector zfsctl_ops_root = { .vop_pathconf = zfsctl_pathconf, #endif .vop_fid = zfsctl_common_fid, + .vop_print = zfsctl_root_print, }; /* @@ -987,6 +1006,11 @@ zfsctl_snapdir_lookup(ap) ZFS_ENTER(zfsvfs); if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') { + VOP_UNLOCK(dvp, 0); + VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE)); + VERIFY0(vn_lock(dvp, LK_EXCLUSIVE)); + } ZFS_EXIT(zfsvfs); return (0); } @@ -1011,6 +1035,7 @@ zfsctl_snapdir_lookup(ap) #endif } +relookup: mutex_enter(&sdp->sd_lock); search.se_name = (char *)nm; if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { @@ -1018,7 +1043,6 @@ zfsctl_snapdir_lookup(ap) VN_HOLD(*vpp); err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY); if (err != 0) { - VN_RELE(*vpp); *vpp = NULL; } else if (*vpp == sep->se_root) { /* @@ -1027,13 +1051,6 @@ zfsctl_snapdir_lookup(ap) */ VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0); goto domount; - } else { - /* - * VROOT was set during the traverse call. We need - * to clear it since we're pretending to be part - * of our parent's vfs. - */ - (*vpp)->v_flag &= ~VROOT; } mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); @@ -1076,7 +1093,6 @@ zfsctl_snapdir_lookup(ap) sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); (void) strcpy(sep->se_name, nm); *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); - VN_HOLD(*vpp); avl_insert(&sdp->sd_snaps, sep, where); dmu_objset_rele(snap, FTAG); @@ -1087,6 +1103,16 @@ domount: (void) snprintf(mountpoint, mountpoint_len, "%s/" ZFS_CTLDIR_NAME "/snapshot/%s", dvp->v_vfsp->mnt_stat.f_mntonname, nm); + mutex_exit(&sdp->sd_lock); + + /* + * The vnode may get reclaimed between dropping sd_lock and + * getting the vnode lock. + * */ + err = vn_lock(*vpp, LK_EXCLUSIVE); + if (err == ENOENT) + goto relookup; + VERIFY0(err); err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0); kmem_free(mountpoint, mountpoint_len); if (err == 0) { @@ -1099,8 +1125,8 @@ domount: */ ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; + (*vpp)->v_flag &= ~VROOT; } - mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); #ifdef illumos @@ -1142,6 +1168,11 @@ zfsctl_shares_lookup(ap) strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') { + VOP_UNLOCK(dvp, 0); + VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE)); + VERIFY0(vn_lock(dvp, LK_EXCLUSIVE)); + } ZFS_EXIT(zfsvfs); return (0); } @@ -1348,8 +1379,8 @@ zfsctl_snapdir_getattr(ap) /* ARGSUSED */ static int -zfsctl_snapdir_inactive(ap) - struct vop_inactive_args /* { +zfsctl_snapdir_reclaim(ap) + struct vop_reclaim_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap; @@ -1358,22 +1389,37 @@ zfsctl_snapdir_inactive(ap) zfsctl_snapdir_t *sdp = vp->v_data; zfs_snapentry_t *sep; - /* - * On forced unmount we have to free snapshots from here. - */ - mutex_enter(&sdp->sd_lock); - while ((sep = avl_first(&sdp->sd_snaps)) != NULL) { - avl_remove(&sdp->sd_snaps, sep); - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - kmem_free(sep, sizeof (zfs_snapentry_t)); - } - mutex_exit(&sdp->sd_lock); - gfs_dir_inactive(vp); ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); mutex_destroy(&sdp->sd_lock); avl_destroy(&sdp->sd_snaps); - kmem_free(sdp, sizeof (zfsctl_snapdir_t)); + gfs_vop_reclaim(ap); + + return (0); +} + +static int +zfsctl_shares_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + printf(" .zfs/shares node\n"); + zfsctl_common_print(ap); + return (0); +} +static int +zfsctl_snapdir_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + vnode_t *vp = ap->a_vp; + zfsctl_snapdir_t *sdp = vp->v_data; + + printf(" .zfs/snapshot node\n"); + printf(" number of children = %lu\n", avl_numnodes(&sdp->sd_snaps)); + zfsctl_common_print(ap); return (0); } @@ -1419,9 +1465,10 @@ static struct vop_vector zfsctl_ops_snapdir = { .vop_mkdir = zfsctl_freebsd_snapdir_mkdir, .vop_readdir = gfs_vop_readdir, .vop_lookup = zfsctl_snapdir_lookup, - .vop_inactive = zfsctl_snapdir_inactive, - .vop_reclaim = zfsctl_common_reclaim, + .vop_inactive = VOP_NULL, + .vop_reclaim = zfsctl_snapdir_reclaim, .vop_fid = zfsctl_common_fid, + .vop_print = zfsctl_snapdir_print, }; static struct vop_vector zfsctl_ops_shares = { @@ -1436,6 +1483,7 @@ static struct vop_vector zfsctl_ops_shares = { .vop_inactive = VOP_NULL, .vop_reclaim = gfs_vop_reclaim, .vop_fid = zfsctl_shares_fid, + .vop_print = zfsctl_shares_print, }; #endif /* illumos */ @@ -1454,7 +1502,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp, &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); - VN_HOLD(vp); zcp = vp->v_data; zcp->zc_id = objset; VOP_UNLOCK(vp, 0); @@ -1462,17 +1509,28 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) return (vp); } +static int +zfsctl_snapshot_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct thread *a_td; + } */ *ap; +{ + vnode_t *vp = ap->a_vp; + + vrecycle(vp); + return (0); +} static int zfsctl_snapshot_reclaim(ap) - struct vop_inactive_args /* { + struct vop_reclaim_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap; { vnode_t *vp = ap->a_vp; cred_t *cr = ap->a_td->td_ucred; - struct vop_reclaim_args iap; zfsctl_snapdir_t *sdp; zfs_snapentry_t *sep, *next; int locked; @@ -1480,7 +1538,6 @@ zfsctl_snapshot_reclaim(ap) VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); sdp = dvp->v_data; - VOP_UNLOCK(dvp, 0); /* this may already have been unmounted */ if (sdp == NULL) { VN_RELE(dvp); @@ -1516,106 +1573,12 @@ zfsctl_snapshot_reclaim(ap) * "active". If we lookup the same name again we will end up * creating a new vnode. */ - iap.a_vp = vp; - gfs_vop_reclaim(&iap); + gfs_vop_reclaim(ap); return (0); } static int -zfsctl_traverse_begin(vnode_t **vpp, int lktype) -{ - - VN_HOLD(*vpp); - /* Snapshot should be already mounted, but just in case. */ - if (vn_mountedvfs(*vpp) == NULL) - return (ENOENT); - return (traverse(vpp, lktype)); -} - -static void -zfsctl_traverse_end(vnode_t *vp, int err) -{ - - if (err == 0) - vput(vp); - else - VN_RELE(vp); -} - -static int -zfsctl_snapshot_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - } */ *ap; -{ - vnode_t *vp = ap->a_vp; - int err; - - err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); - if (err == 0) - err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred); - zfsctl_traverse_end(vp, err); - return (err); -} - -static int -zfsctl_snapshot_fid(ap) - struct vop_fid_args /* { - struct vnode *a_vp; - struct fid *a_fid; - } */ *ap; -{ - vnode_t *vp = ap->a_vp; - int err; - - err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); - if (err == 0) - err = VOP_VPTOFH(vp, (void *)ap->a_fid); - zfsctl_traverse_end(vp, err); - return (err); -} - -static int -zfsctl_snapshot_lookup(ap) - struct vop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; -{ - vnode_t *dvp = ap->a_dvp; - vnode_t **vpp = ap->a_vpp; - struct componentname *cnp = ap->a_cnp; - cred_t *cr = ap->a_cnp->cn_cred; - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - int error; - - if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' || - cnp->cn_nameptr[1] != '.') { - return (ENOENT); - } - - ASSERT(dvp->v_type == VDIR); - ASSERT(zfsvfs->z_ctldir != NULL); - - error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp, - NULL, 0, NULL, cr, NULL, NULL, NULL); - if (error == 0) { - int ltype = VOP_ISLOCKED(dvp); - VN_HOLD(*vpp); - VOP_UNLOCK(dvp, 0); - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); - VN_RELE(*vpp); - vn_lock(dvp, ltype | LK_RETRY); - } - - return (error); -} - -static int zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) { zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data; @@ -1657,18 +1620,31 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) return (error); } +static int +zfsctl_snaphot_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + vnode_t *vp = ap->a_vp; + zfsctl_node_t *zcp = vp->v_data; + + printf(" .zfs/snapshot/<snap> node\n"); + printf(" id = %ju\n", (uintmax_t)zcp->zc_id); + zfsctl_common_print(ap); + return (0); +} + /* * These VP's should never see the light of day. They should always * be covered. */ static struct vop_vector zfsctl_ops_snapshot = { .vop_default = &default_vnodeops, - .vop_inactive = VOP_NULL, - .vop_lookup = zfsctl_snapshot_lookup, + .vop_inactive = zfsctl_snapshot_inactive, .vop_reclaim = zfsctl_snapshot_reclaim, - .vop_getattr = zfsctl_snapshot_getattr, - .vop_fid = zfsctl_snapshot_fid, .vop_vptocnp = zfsctl_snapshot_vptocnp, + .vop_print = zfsctl_snaphot_print, }; int @@ -1709,16 +1685,15 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) */ error = traverse(&vp, LK_SHARED | LK_RETRY); if (error == 0) { - if (vp == sep->se_root) + if (vp == sep->se_root) { + VN_RELE(vp); /* release covered vp */ error = SET_ERROR(EINVAL); - else + } else { *zfsvfsp = VTOZ(vp)->z_zfsvfs; + VN_URELE(vp); /* put snapshot's root vp */ + } } mutex_exit(&sdp->sd_lock); - if (error == 0) - VN_URELE(vp); - else - VN_RELE(vp); } else { error = SET_ERROR(EINVAL); mutex_exit(&sdp->sd_lock); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 651c596..4d6be94 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -1429,6 +1429,7 @@ static int getzfsvfs(const char *dsname, zfsvfs_t **zfvp) { objset_t *os; + vfs_t *vfsp; int error; error = dmu_objset_hold(dsname, FTAG, &os); @@ -1442,19 +1443,21 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) mutex_enter(&os->os_user_ptr_lock); *zfvp = dmu_objset_get_user(os); if (*zfvp) { -#ifdef illumos - VFS_HOLD((*zfvp)->z_vfs); -#else - if (vfs_busy((*zfvp)->z_vfs, 0) != 0) { - *zfvp = NULL; - error = SET_ERROR(ESRCH); - } -#endif + vfsp = (*zfvp)->z_vfs; + vfs_ref(vfsp); } else { error = SET_ERROR(ESRCH); } mutex_exit(&os->os_user_ptr_lock); dmu_objset_rele(os, FTAG); + if (error == 0) { + error = vfs_busy(vfsp, 0); + vfs_rel(vfsp); + if (error != 0) { + *zfvp = NULL; + error = SET_ERROR(ESRCH); + } + } return (error); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index fa6eac7..9bb6091 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -1171,6 +1171,7 @@ zfs_domount(vfs_t *vfsp, char *osname) vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; + vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ /* * The fsid is 64 bits, composed of an 8-bit fs type, which @@ -1782,12 +1783,11 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) if (error == 0) { error = vn_lock(*vpp, flags); - if (error == 0) - (*vpp)->v_vflag |= VV_ROOT; + if (error != 0) { + VN_RELE(*vpp); + *vpp = NULL; + } } - if (error != 0) - *vpp = NULL; - return (error); } @@ -2005,12 +2005,6 @@ zfs_umount(vfs_t *vfsp, int fflag) */ if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); - if (zfsvfs->z_issnap) { - vnode_t *svp = vfsp->mnt_vnodecovered; - - if (svp->v_count >= 2) - VN_RELE(svp); - } zfs_freevfs(vfsp); return (0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 2a15cdf..b0f11ac 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -655,7 +655,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); +#ifdef illumos error = uiomove(va + off, bytes, UIO_READ, uio); +#else + error = vn_io_fault_uiomove(va + off, bytes, uio); +#endif zfs_unmap_page(sf); zfs_vmobject_wlock(obj); page_unhold(pp); @@ -1033,18 +1037,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) * holding up the transaction if the data copy hangs * up on a pagefault (e.g., from an NFS server mapping). */ +#ifdef illumos size_t cbytes; +#endif abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); +#ifdef illumos if (error = uiocopy(abuf->b_data, max_blksz, UIO_WRITE, uio, &cbytes)) { dmu_return_arcbuf(abuf); break; } ASSERT(cbytes == max_blksz); +#else + ssize_t resid = uio->uio_resid; + error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); + if (error != 0) { + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + dmu_return_arcbuf(abuf); + break; + } +#endif } /* @@ -1122,8 +1139,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } +#ifdef illumos ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); +#endif } if (tx_bytes && vn_has_cached_data(vp)) { update_pages(vp, woff, tx_bytes, zfsvfs->z_os, @@ -1177,7 +1196,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) while ((end_size = zp->z_size) < uio->uio_loffset) { (void) atomic_cas_64(&zp->z_size, end_size, uio->uio_loffset); +#ifdef illumos ASSERT(error == 0); +#else + ASSERT(error == 0 || error == EFAULT); +#endif } /* * If we are replaying and eof is non zero then force @@ -1187,7 +1210,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) zp->z_size = zfsvfs->z_replay_eof; - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + if (error == 0) + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + else + (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); dmu_tx_commit(tx); @@ -1214,6 +1240,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) return (error); } +#ifdef __FreeBSD__ + /* + * EFAULT means that at least one page of the source buffer was not + * available. VFS will re-try remaining I/O upon this error. + */ + if (error == EFAULT) { + ZFS_EXIT(zfsvfs); + return (error); + } +#endif + if (ioflag & (FSYNC | FDSYNC) || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); @@ -7165,6 +7202,53 @@ zfs_freebsd_aclcheck(ap) return (EOPNOTSUPP); } +static int +zfs_vptocnp(struct vop_vptocnp_args *ap) +{ + vnode_t *covered_vp; + vnode_t *vp = ap->a_vp;; + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + znode_t *zp = VTOZ(vp); + uint64_t parent; + int ltype; + int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + /* + * If we are a snapshot mounted under .zfs, run the operation + * on the covered vnode. + */ + if ((error = sa_lookup(zp->z_sa_hdl, + SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) { + ZFS_EXIT(zfsvfs); + return (error); + } + + if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) { + ZFS_EXIT(zfsvfs); + return (vop_stdvptocnp(ap)); + } + ZFS_EXIT(zfsvfs); + + covered_vp = vp->v_mount->mnt_vnodecovered; + vhold(covered_vp); + ltype = VOP_ISLOCKED(vp); + VOP_UNLOCK(vp, 0); + error = vget(covered_vp, LK_EXCLUSIVE, curthread); + vdrop(covered_vp); + if (error == 0) { + error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, + ap->a_buf, ap->a_buflen); + vput(covered_vp); + } + vn_lock(vp, ltype | LK_RETRY); + if ((vp->v_iflag & VI_DOOMED) != 0) + error = SET_ERROR(ENOENT); + return (error); +} + struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; struct vop_vector zfs_shareops; @@ -7210,6 +7294,7 @@ struct vop_vector zfs_vnodeops = { .vop_aclcheck = zfs_freebsd_aclcheck, .vop_getpages = zfs_freebsd_getpages, .vop_putpages = zfs_freebsd_putpages, + .vop_vptocnp = zfs_vptocnp, }; struct vop_vector zfs_fifoops = { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 431a05d..3853838 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -574,9 +574,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; /* - * Slap on VROOT if we are the root znode + * Slap on VROOT if we are the root znode unless we are the root + * node of a snapshot mounted under .zfs. */ - if (zp->z_id == zfsvfs->z_root) + if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs) ZTOV(zp)->v_flag |= VROOT; mutex_exit(&zp->z_lock); |