summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/fletcher.c104
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c28
7 files changed, 144 insertions, 16 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/fletcher.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/fletcher.c
index edda3c9..54247d7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/fletcher.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/fletcher.c
@@ -19,11 +19,111 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Fletcher Checksums
+ * ------------------
+ *
+ * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
+ * recurrence relations:
+ *
+ * a = a + f
+ * i i-1 i-1
+ *
+ * b = b + a
+ * i i-1 i
+ *
+ * c = c + b (fletcher-4 only)
+ * i i-1 i
+ *
+ * d = d + c (fletcher-4 only)
+ * i i-1 i
+ *
+ * Where
+ * a_0 = b_0 = c_0 = d_0 = 0
+ * and
+ * f_0 .. f_(n-1) are the input data.
+ *
+ * Using standard techniques, these translate into the following series:
+ *
+ * __n_ __n_
+ * \ | \ |
+ * a = > f b = > i * f
+ * n /___| n - i n /___| n - i
+ * i = 1 i = 1
+ *
+ *
+ * __n_ __n_
+ * \ | i*(i+1) \ | i*(i+1)*(i+2)
+ * c = > ------- f d = > ------------- f
+ * n /___| 2 n - i n /___| 6 n - i
+ * i = 1 i = 1
+ *
+ * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
+ * Since the additions are done mod (2^64), errors in the high bits may not
+ * be noticed. For this reason, fletcher-2 is deprecated.
+ *
+ * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
+ * A conservative estimate of how big the buffer can get before we overflow
+ * can be estimated using f_i = 0xffffffff for all i:
+ *
+ * % bc
+ * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
+ * 2264
+ * quit
+ * %
+ *
+ * So blocks of up to 2k will not overflow. Our largest block size is
+ * 128k, which has 32k 4-byte words, so we can compute the largest possible
+ * accumulators, then divide by 2^64 to figure the max amount of overflow:
+ *
+ * % bc
+ * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
+ * a/2^64;b/2^64;c/2^64;d/2^64
+ * 0
+ * 0
+ * 1365
+ * 11186858
+ * quit
+ * %
+ *
+ * So a and b cannot overflow. To make sure each bit of input has some
+ * effect on the contents of c and d, we can look at what the factors of
+ * the coefficients in the equations for c_n and d_n are. The number of 2s
+ * in the factors determines the lowest set bit in the multiplier. Running
+ * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
+ * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow
+ * the 64-bit accumulators, every bit of every f_i effects every accumulator,
+ * even for 128k blocks.
+ *
+ * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
+ * we could do our calculations mod (2^32 - 1) by adding in the carries
+ * periodically, and store the number of carries in the top 32-bits.
+ *
+ * --------------------
+ * Checksum Performance
+ * --------------------
+ *
+ * There are two interesting components to checksum performance: cached and
+ * uncached performance. With cached data, fletcher-2 is about four times
+ * faster than fletcher-4. With uncached data, the performance difference is
+ * negligible, since the cost of a cache fill dominates the processing time.
+ * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
+ * efficient pass over the data.
+ *
+ * In normal operation, the data which is being checksummed is in a buffer
+ * which has been filled either by:
+ *
+ * 1. a compression step, which will be mostly cached, or
+ * 2. a bcopy() or copyin(), which will be uncached (because the
+ * copy is cache-bypassing).
+ *
+ * For both cached and uncached data, both fletcher checksums are much faster
+ * than sha-256, and slower than 'off', which doesn't touch the data at all.
+ */
#include <sys/types.h>
#include <sys/sysmacros.h>
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
index bb1c9da..185f7d2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
@@ -255,6 +255,7 @@ VTOZ(vnode_t *vp)
/*
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
+ * ZFS_ENTER_NOERROR() is called when we can't return EIO.
* ZFS_EXIT() must be called before exitting the vop.
* ZFS_VERIFY_ZP() verifies the znode is valid.
*/
@@ -267,6 +268,9 @@ VTOZ(vnode_t *vp)
} \
}
+#define ZFS_ENTER_NOERROR(zfsvfs) \
+ rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG)
+
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
#define ZFS_VERIFY_ZP(zp) \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 6331567..fbc8c45 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -76,7 +76,7 @@ enum zio_checksum {
ZIO_CHECKSUM_FUNCTIONS
};
-#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
+#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
enum zio_compress {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
index d466273..a43d85c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
@@ -1841,7 +1841,7 @@ zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
fgid = zfs_fuid_create_cred(zfsvfs,
ZFS_GROUP, tx, cr, fuidp);
#ifdef __FreeBSD__
- gid = parent->z_phys->zp_gid;
+ gid = fgid = parent->z_phys->zp_gid;
#else
gid = crgetgid(cr);
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 185d84b..b9a4b2f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -864,7 +864,7 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
znode_t *rootzp;
int error;
- ZFS_ENTER(zfsvfs);
+ ZFS_ENTER_NOERROR(zfsvfs);
error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
if (error == 0) {
@@ -898,6 +898,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
* 'z_parent' is self referential for non-snapshots.
*/
(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
+#ifdef FREEBSD_NAMECACHE
+ cache_purgevfs(zfsvfs->z_parent->z_vfs);
+#endif
}
/*
@@ -1027,6 +1030,17 @@ zfs_umount(vfs_t *vfsp, int fflag)
ASSERT(zfsvfs->z_ctldir == NULL);
}
+ if (fflag & MS_FORCE) {
+ /*
+ * Mark file system as unmounted before calling
+ * vflush(FORCECLOSE). This way we ensure no future vnops
+ * will be called and risk operating on DOOMED vnodes.
+ */
+ rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+ zfsvfs->z_unmounted = B_TRUE;
+ rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+ }
+
/*
* Flush all the files.
*/
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index 0753fe9..a761c81 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -4860,7 +4860,7 @@ zfs_freebsd_getacl(ap)
vsecattr_t vsecattr;
if (ap->a_type != ACL_TYPE_NFS4)
- return (EOPNOTSUPP);
+ return (EINVAL);
vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
if (error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))
@@ -4889,7 +4889,7 @@ zfs_freebsd_setacl(ap)
aclent_t *aaclp;
if (ap->a_type != ACL_TYPE_NFS4)
- return (EOPNOTSUPP);
+ return (EINVAL);
if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
return (EINVAL);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
index 082198f..c43a850 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
@@ -890,17 +890,25 @@ again:
if (zp->z_unlinked) {
err = ENOENT;
} else {
- if ((vp = ZTOV(zp)) != NULL) {
- VI_LOCK(vp);
+ int dying = 0;
+
+ vp = ZTOV(zp);
+ if (vp == NULL)
+ dying = 1;
+ else {
+ VN_HOLD(vp);
if ((vp->v_iflag & VI_DOOMED) != 0) {
- VI_UNLOCK(vp);
- vp = NULL;
- } else
- VI_UNLOCK(vp);
+ dying = 1;
+ /*
+ * Don't VN_RELE() vnode here, because
+ * it can call vn_lock() which creates
+ * LOR between vnode lock and znode
+ * lock. We will VN_RELE() the vnode
+ * after droping znode lock.
+ */
+ }
}
- if (vp != NULL)
- VN_HOLD(vp);
- else {
+ if (dying) {
if (first) {
ZFS_LOG(1, "dying znode detected (zp=%p)", zp);
first = 0;
@@ -912,6 +920,8 @@ again:
dmu_buf_rele(db, NULL);
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+ if (vp != NULL)
+ VN_RELE(vp);
tsleep(zp, 0, "zcollide", 1);
goto again;
}
OpenPOWER on IntegriCloud