summaryrefslogtreecommitdiffstats
path: root/sys/cddl
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2014-01-01 00:45:28 +0000
committerdelphij <delphij@FreeBSD.org>2014-01-01 00:45:28 +0000
commit82c2441b7daa2585e943ff2e8af3530cdf06a70f (patch)
tree6f71e8255a03efe882fefc70c33cd6de2fb6ce79 /sys/cddl
parentffedbbec0256cfb43963d1d0e6175b9f87786eaa (diff)
downloadFreeBSD-src-82c2441b7daa2585e943ff2e8af3530cdf06a70f.zip
FreeBSD-src-82c2441b7daa2585e943ff2e8af3530cdf06a70f.tar.gz
MFV r259170:
4370 avoid transmitting holes during zfs send 4371 DMU code clean up illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f NOTE: Make sure the boot code is updated if a zpool upgrade is done on boot zpool. MFC after: 2 weeks
Diffstat (limited to 'sys/cddl')
-rw-r--r--sys/cddl/boot/zfs/zfsimpl.h24
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c48
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c12
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c135
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c17
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c37
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c53
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c185
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c12
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c31
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c26
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h18
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h86
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c111
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c3
31 files changed, 561 insertions, 349 deletions
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
index 394a5e3..7bc4c43 100644
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -222,9 +222,10 @@ typedef struct blkptr {
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
- BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
+ BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_ASIZE(dva, x) \
- BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
+ BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
+ SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
@@ -242,14 +243,14 @@ typedef struct blkptr {
#define BP_GET_LSIZE(bp) \
(BP_IS_HOLE(bp) ? 0 : \
- BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
+ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
#define BP_SET_LSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
+ BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_PSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
+ BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_PSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
+ BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
@@ -266,7 +267,7 @@ typedef struct blkptr {
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
-#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
+#define BP_GET_BYTEORDER(bp) BF64_GET((bp)->blk_prop, 63, 1)
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \
@@ -285,11 +286,6 @@ typedef struct blkptr {
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
-#define BP_COUNT_GANG(bp) \
- (DVA_GET_GANG(&(bp)->blk_dva[0]) + \
- DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2]))
-
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
@@ -313,7 +309,9 @@ typedef struct blkptr {
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
-#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
+#define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
+ (dva)->dva_word[1] == 0ULL)
+#define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp))
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
#define BP_ZERO(bp) \
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
index c8b0639..3f9a2e6 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
@@ -114,10 +114,21 @@ zfeature_lookup_name(const char *name, spa_feature_t *res)
return (ENOENT);
}
+boolean_t
+zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
+ zfeature_info_t *feature = &spa_feature_table[fid];
+
+ for (int i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) {
+ if (feature->fi_depends[i] == check)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
static void
zfeature_register(spa_feature_t fid, const char *guid, const char *name,
const char *desc, boolean_t readonly, boolean_t mos,
- const spa_feature_t *deps)
+ boolean_t activate_on_enable, const spa_feature_t *deps)
{
zfeature_info_t *feature = &spa_feature_table[fid];
static spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
@@ -137,6 +148,7 @@ zfeature_register(spa_feature_t fid, const char *guid, const char *name,
feature->fi_desc = desc;
feature->fi_can_readonly = readonly;
feature->fi_mos = mos;
+ feature->fi_activate_on_enable = activate_on_enable;
feature->fi_depends = deps;
}
@@ -145,21 +157,43 @@ zpool_feature_init(void)
{
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
"com.delphix:async_destroy", "async_destroy",
- "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
+ "Destroy filesystems asynchronously.", B_TRUE, B_FALSE,
+ B_FALSE, NULL);
+
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
"com.delphix:empty_bpobj", "empty_bpobj",
- "Snapshots use less space.", B_TRUE, B_FALSE, NULL);
+ "Snapshots use less space.", B_TRUE, B_FALSE,
+ B_FALSE, NULL);
+
zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
"org.illumos:lz4_compress", "lz4_compress",
- "LZ4 compression algorithm support.", B_FALSE, B_FALSE, NULL);
+ "LZ4 compression algorithm support.", B_FALSE, B_FALSE,
+ B_FALSE, NULL);
+
zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
"com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump",
- "Crash dumps to multiple vdev pools.", B_FALSE, B_FALSE, NULL);
+ "Crash dumps to multiple vdev pools.", B_FALSE, B_FALSE,
+ B_FALSE, NULL);
+
zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM,
"com.delphix:spacemap_histogram", "spacemap_histogram",
- "Spacemaps maintain space histograms.", B_TRUE, B_FALSE, NULL);
+ "Spacemaps maintain space histograms.", B_TRUE, B_FALSE,
+ B_FALSE, NULL);
+
+ zfeature_register(SPA_FEATURE_ENABLED_TXG,
+ "com.delphix:enabled_txg", "enabled_txg",
+ "Record txg at which a feature is enabled", B_TRUE, B_FALSE,
+ B_FALSE, NULL);
+
+ static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG,
+ SPA_FEATURE_NONE };
+ zfeature_register(SPA_FEATURE_HOLE_BIRTH,
+ "com.delphix:hole_birth", "hole_birth",
+ "Retain hole birth txg for more precise zfs send",
+ B_FALSE, B_TRUE, B_TRUE, hole_birth_deps);
+
zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET,
"com.delphix:extensible_dataset", "extensible_dataset",
"Enhanced dataset functionality, used by other features.",
- B_FALSE, B_FALSE, NULL);
+ B_FALSE, B_FALSE, B_FALSE, NULL);
}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
index 0babe55..3c3530b 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
@@ -44,10 +44,14 @@ typedef enum spa_feature {
SPA_FEATURE_LZ4_COMPRESS,
SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
SPA_FEATURE_SPACEMAP_HISTOGRAM,
+ SPA_FEATURE_ENABLED_TXG,
+ SPA_FEATURE_HOLE_BIRTH,
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURES
} spa_feature_t;
+#define SPA_FEATURE_DISABLED (-1ULL)
+
typedef struct zfeature_info {
spa_feature_t fi_feature;
const char *fi_uname; /* User-facing feature name */
@@ -55,6 +59,8 @@ typedef struct zfeature_info {
const char *fi_desc; /* Feature description */
boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
+ /* Activate this feature at the same time it is enabled */
+ boolean_t fi_activate_on_enable;
/* array of dependencies, terminated by SPA_FEATURE_NONE */
const spa_feature_t *fi_depends;
} zfeature_info_t;
@@ -69,6 +75,7 @@ extern boolean_t zfeature_is_valid_guid(const char *);
extern boolean_t zfeature_is_supported(const char *);
extern int zfeature_lookup_name(const char *name, spa_feature_t *res);
+extern boolean_t zfeature_depends_on(spa_feature_t fid, spa_feature_t check);
extern void zpool_feature_init(void);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index a4fbf04..6b6370d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -845,7 +845,7 @@ buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
#define BUF_EMPTY(buf) \
((buf)->b_dva.dva_word[0] == 0 && \
(buf)->b_dva.dva_word[1] == 0 && \
- (buf)->b_birth == 0)
+ (buf)->b_cksum0 == 0)
#define BUF_EQUAL(spa, dva, birth, buf) \
((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
@@ -3767,9 +3767,13 @@ arc_write_done(zio_t *zio)
ASSERT(hdr->b_acb == NULL);
if (zio->io_error == 0) {
- hdr->b_dva = *BP_IDENTITY(zio->io_bp);
- hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
- hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
+ if (BP_IS_HOLE(zio->io_bp)) {
+ buf_discard_identity(hdr);
+ } else {
+ hdr->b_dva = *BP_IDENTITY(zio->io_bp);
+ hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
+ hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
+ }
} else {
ASSERT(BUF_EMPTY(hdr));
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
index a0c90cc..f70f23c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/arc.h>
@@ -141,7 +141,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int err;
struct bptree_args *ba = arg;
- if (bp == NULL)
+ if (BP_IS_HOLE(bp))
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 0a39159..27ed6e3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -455,10 +455,9 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
int blksz = db->db.db_size;
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
mutex_exit(&db->db_mtx);
- DB_GET_SPA(&spa, db);
abuf = arc_loan_buf(spa, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
@@ -519,7 +518,6 @@ static void
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
{
dnode_t *dn;
- spa_t *spa;
zbookmark_t zb;
uint32_t aflags = ARC_NOWAIT;
@@ -559,9 +557,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
- db->db.db_size, db, type));
DB_DNODE_EXIT(db);
+ dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
+ db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
db->db_state = DB_CACHED;
*flags |= DB_RF_CACHED;
@@ -569,7 +567,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
return;
}
- spa = dn->dn_objset->os_spa;
DB_DNODE_EXIT(db);
db->db_state = DB_READ;
@@ -586,7 +583,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
dbuf_add_ref(db, NULL);
- (void) arc_read(zio, spa, db->db_blkptr,
+ (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
@@ -598,8 +595,8 @@ int
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
int err = 0;
- int havepzio = (zio != NULL);
- int prefetch;
+ boolean_t havepzio = (zio != NULL);
+ boolean_t prefetch;
dnode_t *dn;
/*
@@ -694,11 +691,10 @@ dbuf_noread(dmu_buf_impl_t *db)
cv_wait(&db->db_changed, &db->db_mtx);
if (db->db_state == DB_UNCACHED) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
- DB_GET_SPA(&spa, db);
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
@@ -753,9 +749,8 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
- DB_GET_SPA(&spa, db);
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
@@ -781,12 +776,9 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
ASSERT(db->db_data_pending != dr);
/* free this block */
- if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) {
- spa_t *spa;
+ if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
+ zio_free(db->db_objset->os_spa, txg, bp);
- DB_GET_SPA(&spa, db);
- zio_free(spa, txg, bp);
- }
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
@@ -804,9 +796,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
/*
* Evict (if its unreferenced) or clear (if its referenced) any level-0
* data blocks in the free range, so that any future readers will find
- * empty blocks. Also, if we happen across any level-1 dbufs in the
- * range that have not already been marked dirty, mark them dirty so
- * they stay in memory.
+ * empty blocks.
*
* This is a no-op if the dataset is in the middle of an incremental
* receive; see comment below for details.
@@ -816,14 +806,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
{
dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- uint64_t first_l1 = start >> epbs;
- uint64_t last_l1 = end >> epbs;
- if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
+ if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID))
end = dn->dn_maxblkid;
- last_l1 = end >> epbs;
- }
dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
mutex_enter(&dn->dn_dbufs_mtx);
@@ -846,23 +831,8 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
db_next = list_next(&dn->dn_dbufs, db);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- if (db->db_level == 1 &&
- db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
- mutex_enter(&db->db_mtx);
- if (db->db_last_dirty &&
- db->db_last_dirty->dr_txg < txg) {
- dbuf_add_ref(db, FTAG);
- mutex_exit(&db->db_mtx);
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- } else {
- mutex_exit(&db->db_mtx);
- }
- }
-
if (db->db_level != 0)
continue;
- dprintf_dbuf(db, "found buf %s\n", "");
if (db->db_blkid < start || db->db_blkid > end)
continue;
@@ -939,24 +909,29 @@ dbuf_block_freeable(dmu_buf_impl_t *db)
* We don't need any locking to protect db_blkptr:
* If it's syncing, then db_last_dirty will be set
* so we'll ignore db_blkptr.
+ *
+ * This logic ensures that only block births for
+ * filled blocks are considered.
*/
ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_last_dirty)
+ if (db->db_last_dirty && (db->db_blkptr == NULL ||
+ !BP_IS_HOLE(db->db_blkptr))) {
birth_txg = db->db_last_dirty->dr_txg;
- else if (db->db_blkptr)
+ } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
birth_txg = db->db_blkptr->blk_birth;
+ }
/*
- * If we don't exist or are in a snapshot, we can't be freed.
+ * If this block don't exist or is in a snapshot, it can't be freed.
* Don't pass the bp to dsl_dataset_block_freeable() since we
* are holding the db_mtx lock and might deadlock if we are
* prefetching a dedup-ed block.
*/
- if (birth_txg)
+ if (birth_txg != 0)
return (ds == NULL ||
dsl_dataset_block_freeable(ds, NULL, birth_txg));
else
- return (FALSE);
+ return (B_FALSE);
}
void
@@ -976,7 +951,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
/*
- * This call to dbuf_will_dirty() with the dn_struct_rwlock held
+ * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
* is OK, because there can be no other references to the db
* when we are changing its size, so no concurrent DB_FILL can
* be happening.
@@ -985,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
* XXX we should be doing a dbuf_read, checking the return
* value and returning that up to our callers
*/
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
/* create the data buffer for the new block */
buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
@@ -1015,9 +990,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
void
dbuf_release_bp(dmu_buf_impl_t *db)
{
- objset_t *os;
+ objset_t *os = db->db_objset;
- DB_GET_OBJSET(&os, db);
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(arc_released(os->os_phys_buf) ||
list_link_active(&os->os_dsl_dataset->ds_synced_link));
@@ -1391,10 +1365,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (B_FALSE);
}
-#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
void
-dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
ASSERT(tx->tx_txg != 0);
@@ -1517,7 +1491,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
db->db_state = DB_FILL;
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
- dbuf_fill_done(db, tx);
+ dmu_buf_fill_done(&db->db, tx);
}
/*
@@ -2022,7 +1996,6 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
* Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
* dnode's parent dbuf evicting its dnode handles.
*/
-#pragma weak dmu_buf_rele = dbuf_rele
void
dbuf_rele(dmu_buf_impl_t *db, void *tag)
{
@@ -2030,6 +2003,12 @@ dbuf_rele(dmu_buf_impl_t *db, void *tag)
dbuf_rele_and_unlock(db, tag);
}
+void
+dmu_buf_rele(dmu_buf_t *db, void *tag)
+{
+ dbuf_rele((dmu_buf_impl_t *)db, tag);
+}
+
/*
* dbuf_rele() for an already-locked dbuf. This is necessary to allow
* db_dirtycnt and db_holds to be updated atomically.
@@ -2480,18 +2459,14 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta;
- if (BP_IS_HOLE(bp)) {
- ASSERT(bp->blk_fill == 0);
- DB_DNODE_EXIT(db);
- return;
+ if (bp->blk_birth != 0) {
+ ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_type) ||
+ (db->db_blkid == DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_bonustype));
+ ASSERT(BP_GET_LEVEL(bp) == db->db_level);
}
- ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_type) ||
- (db->db_blkid == DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_bonustype));
- ASSERT(BP_GET_LEVEL(bp) == db->db_level);
-
mutex_enter(&db->db_mtx);
#ifdef ZFS_DEBUG
@@ -2517,7 +2492,11 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
fill++;
}
} else {
- fill = 1;
+ if (BP_IS_HOLE(bp)) {
+ fill = 0;
+ } else {
+ fill = 1;
+ }
}
} else {
blkptr_t *ibp = db->db.db_data;
@@ -2572,9 +2551,10 @@ static void
dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
{
dmu_buf_impl_t *db = vdb;
- blkptr_t *bp = zio->io_bp;
blkptr_t *bp_orig = &zio->io_bp_orig;
- uint64_t txg = zio->io_txg;
+ blkptr_t *bp = db->db_blkptr;
+ objset_t *os = db->db_objset;
+ dmu_tx_t *tx = os->os_synctx;
dbuf_dirty_record_t **drp, *dr;
ASSERT0(zio->io_error);
@@ -2587,14 +2567,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
ASSERT(BP_EQUAL(bp, bp_orig));
} else {
- objset_t *os;
- dsl_dataset_t *ds;
- dmu_tx_t *tx;
-
- DB_GET_OBJSET(&os, db);
- ds = os->os_dsl_dataset;
- tx = os->os_synctx;
-
+ dsl_dataset_t *ds = os->os_dsl_dataset;
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
dsl_dataset_block_born(ds, bp, tx);
}
@@ -2607,7 +2580,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
while ((dr = *drp) != db->db_data_pending)
drp = &dr->dr_next;
ASSERT(!list_link_active(&dr->dr_dirty_node));
- ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
ASSERT(dr->dr_next == NULL);
*drp = dr->dr_next;
@@ -2641,14 +2613,14 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
+ ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
if (!BP_IS_HOLE(db->db_blkptr)) {
int epbs =
dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ ASSERT3U(db->db_blkid, <=,
+ dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
db->db.db_size);
- ASSERT3U(dn->dn_phys->dn_maxblkid
- >> (db->db_level * epbs), >=, db->db_blkid);
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
DB_DNODE_EXIT(db);
@@ -2661,8 +2633,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
-
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
}
static void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
index f6ab094..b7fbe9a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
@@ -119,12 +119,12 @@ ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name,
sizeof (uint64_t), 1, &ddt->ddt_object[type][class]);
- if (error)
+ if (error != 0)
return (error);
- error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
+ VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
- &ddt->ddt_histogram[type][class]);
+ &ddt->ddt_histogram[type][class]));
/*
* Seed the cached statistics.
@@ -139,8 +139,7 @@ ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
- ASSERT(error == 0);
- return (error);
+ return (0);
}
static void
@@ -595,7 +594,10 @@ ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
bcopy(src, dst, s_len);
}
- *version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc;
+ *version = cpfunc;
+ /* CONSTCOND */
+ if (ZFS_HOST_BYTEORDER)
+ *version |= DDT_COMPRESS_BYTEORDER_MASK;
return (c_len + 1);
}
@@ -612,7 +614,8 @@ ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
else
bcopy(src, dst, d_len);
- if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK)
+ if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) !=
+ (ZFS_HOST_BYTEORDER != 0))
byteswap_uint64_array(dst, d_len);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index ccc36270..7da25c7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -687,7 +687,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
* will take the fast path, and (b) dnode_reallocate() can verify
* that the entire file has been freed.
*/
- if (offset == 0 && length == DMU_OBJECT_END)
+ if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
dn->dn_maxblkid = 0;
dnode_rele(dn, FTAG);
@@ -1253,10 +1253,8 @@ arc_buf_t *
dmu_request_arcbuf(dmu_buf_t *handle, int size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
- spa_t *spa;
- DB_GET_SPA(&spa, db);
- return (arc_loan_buf(spa, size));
+ return (arc_loan_buf(db->db_objset->os_spa, size));
}
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
index 945641b..8ae452a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
@@ -141,7 +141,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (zb->zb_object != DMU_META_DNODE_OBJECT)
return (0);
- if (bp == NULL) {
+ if (BP_IS_HOLE(bp)) {
uint64_t span = DBP_SPAN(dnp, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
index c8b5f94..ab9f55b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
@@ -372,11 +372,12 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
- } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
+ } else if (BP_IS_HOLE(bp) &&
+ zb->zb_object == DMU_META_DNODE_OBJECT) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
- } else if (bp == NULL) {
+ } else if (BP_IS_HOLE(bp)) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index aa9a485..68bcac4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -36,6 +36,7 @@
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/callb.h>
+#include <sys/zfeature.h>
int zfs_pd_blks_max = 100;
@@ -72,7 +73,7 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
traverse_data_t *td = arg;
zbookmark_t zb;
- if (bp->blk_birth == 0)
+ if (BP_IS_HOLE(bp))
return (0);
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
@@ -96,7 +97,7 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_t zb;
- if (bp->blk_birth == 0)
+ if (BP_IS_HOLE(bp))
return (0);
if (claim_txg == 0 || bp->blk_birth < claim_txg)
@@ -226,14 +227,35 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
ASSERT(0);
}
+ if (bp->blk_birth == 0) {
+ if (spa_feature_is_active(td->td_spa, SPA_FEATURE_HOLE_BIRTH)) {
+ /*
+ * Since this block has a birth time of 0 it must be a
+ * hole created before the SPA_FEATURE_HOLE_BIRTH
+ * feature was enabled. If SPA_FEATURE_HOLE_BIRTH
+ * was enabled before the min_txg for this traveral we
+ * know the hole must have been created before the
+ * min_txg for this traveral, so we can skip it. If
+ * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
+ * for this traveral we cannot tell if the hole was
+ * created before or after the min_txg for this
+ * traversal, so we cannot skip it.
+ */
+ uint64_t hole_birth_enabled_txg;
+ VERIFY(spa_feature_enabled_txg(td->td_spa,
+ SPA_FEATURE_HOLE_BIRTH, &hole_birth_enabled_txg));
+ if (hole_birth_enabled_txg < td->td_min_txg)
+ return (0);
+ }
+ } else if (bp->blk_birth <= td->td_min_txg) {
+ return (0);
+ }
+
if (BP_IS_HOLE(bp)) {
- err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
+ err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
return (err);
}
- if (bp->blk_birth <= td->td_min_txg)
- return (0);
-
if (pd && !pd->pd_exited &&
((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
@@ -436,7 +458,8 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (pfd->pd_cancel)
return (SET_ERROR(EINTR));
- if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
+ if (BP_IS_HOLE(bp) ||
+ !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
index 9c5e3cd..b0eabbf 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
@@ -1549,7 +1549,13 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
} else {
ASSERT(dn->dn_maxblkid == 0);
if (off == 0 && len >= blksz) {
- /* Freeing the whole block; fast-track this request */
+ /*
+ * Freeing the whole block; fast-track this request.
+ * Note that we won't dirty any indirect blocks,
+ * which is fine because we will be freeing the entire
+ * file and thus all indirect blocks will be freed
+ * by free_children().
+ */
blkid = 0;
nblks = 1;
goto done;
@@ -1576,7 +1582,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
if (db->db_last_dirty ||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
rw_exit(&dn->dn_struct_rwlock);
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
data = db->db.db_data;
bzero(data + blkoff, head);
@@ -1612,7 +1618,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
if (db->db_last_dirty ||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
rw_exit(&dn->dn_struct_rwlock);
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
bzero(db->db.db_data, tail);
}
@@ -1633,18 +1639,18 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
nblks += 1;
/*
- * Read in and mark all the level-1 indirects dirty,
- * so that they will stay in memory until syncing phase.
- * Always dirty the first and last indirect to make sure
- * we dirty all the partial indirects.
+ * Dirty the first and last indirect blocks, as they (and/or their
+ * parents) will need to be written out if they were only
+ * partially freed. Interior indirect blocks will be themselves freed,
+ * by free_children(), so they need not be dirtied. Note that these
+ * interior blocks have already been prefetched by dmu_tx_hold_free().
*/
if (dn->dn_nlevels > 1) {
- uint64_t i, first, last;
- int shift = epbs + dn->dn_datablkshift;
+ uint64_t first, last;
first = blkid >> epbs;
if (db = dbuf_hold_level(dn, 1, first, FTAG)) {
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
dbuf_rele(db, FTAG);
}
if (trunc)
@@ -1652,26 +1658,11 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
else
last = (blkid + nblks - 1) >> epbs;
if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
dbuf_rele(db, FTAG);
}
- for (i = first + 1; i < last; i++) {
- uint64_t ibyte = i << shift;
- int err;
-
- err = dnode_next_offset(dn,
- DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0);
- i = ibyte >> shift;
- if (err == ESRCH || i >= last)
- break;
- ASSERT(err == 0);
- db = dbuf_hold_level(dn, 1, i, FTAG);
- if (db) {
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- }
- }
}
+
done:
/*
* Add this range to the dnode range list.
@@ -1699,8 +1690,6 @@ done:
dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
dnode_setdirty(dn, tx);
out:
- if (trunc && dn->dn_maxblkid >= (off >> blkshift))
- dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0);
rw_exit(&dn->dn_struct_rwlock);
}
@@ -1877,8 +1866,10 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
data = db->db.db_data;
}
- if (db && txg &&
- (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) {
+
+ if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
+ db->db_blkptr->blk_birth <= txg ||
+ BP_IS_HOLE(db->db_blkptr))) {
/*
* This can only happen when we are searching up the tree
* and these conditions mean that we need to keep climbing.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
index 7fec170..d796404 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
@@ -32,6 +32,7 @@
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/spa.h>
+#include <sys/zfeature.h>
static void
dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
@@ -112,26 +113,44 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
rw_exit(&dn->dn_struct_rwlock);
}
-static int
+static void
free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
{
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
uint64_t bytesfreed = 0;
- int i, blocks_freed = 0;
dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
- for (i = 0; i < num; i++, bp++) {
+ for (int i = 0; i < num; i++, bp++) {
if (BP_IS_HOLE(bp))
continue;
bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
+
+ /*
+ * Save some useful information on the holes being
+ * punched, including logical size, type, and indirection
+ * level. Retaining birth time enables detection of when
+ * holes are punched for reducing the number of free
+ * records transmitted during a zfs send.
+ */
+
+ uint64_t lsize = BP_GET_LSIZE(bp);
+ dmu_object_type_t type = BP_GET_TYPE(bp);
+ uint64_t lvl = BP_GET_LEVEL(bp);
+
bzero(bp, sizeof (blkptr_t));
- blocks_freed += 1;
+
+ if (spa_feature_is_active(dn->dn_objset->os_spa,
+ SPA_FEATURE_HOLE_BIRTH)) {
+ BP_SET_LSIZE(bp, lsize);
+ BP_SET_TYPE(bp, type);
+ BP_SET_LEVEL(bp, lvl);
+ BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
+ }
}
dnode_diduse_space(dn, -bytesfreed);
- return (blocks_freed);
}
#ifdef ZFS_DEBUG
@@ -215,30 +234,27 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
#define ALL -1
-static int
-free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
+static void
+free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
dmu_tx_t *tx)
{
dnode_t *dn;
blkptr_t *bp;
dmu_buf_impl_t *subdb;
uint64_t start, end, dbstart, dbend, i;
- int epbs, shift, err;
- int all = TRUE;
- int blocks_freed = 0;
+ int epbs, shift;
/*
* There is a small possibility that this block will not be cached:
* 1 - if level > 1 and there are no children with level <= 1
- * 2 - if we didn't get a dirty hold (because this block had just
- * finished being written -- and so had no holds), and then this
- * block got evicted before we got here.
+ * 2 - if this block was evicted since we read it from
+ * dmu_tx_hold_free().
*/
if (db->db_state != DB_CACHED)
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
dbuf_release_bp(db);
- bp = (blkptr_t *)db->db.db_data;
+ bp = db->db.db_data;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
@@ -248,7 +264,6 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
start = blkid >> shift;
if (dbstart < start) {
bp += start - dbstart;
- all = FALSE;
} else {
start = dbstart;
}
@@ -256,49 +271,46 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
end = (blkid + nblks - 1) >> shift;
if (dbend <= end)
end = dbend;
- else if (all)
- all = trunc;
+
ASSERT3U(start, <=, end);
if (db->db_level == 1) {
FREE_VERIFY(db, start, end, tx);
- blocks_freed = free_blocks(dn, bp, end-start+1, tx);
- arc_buf_freeze(db->db_buf);
- ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
- DB_DNODE_EXIT(db);
- return (all ? ALL : blocks_freed);
+ free_blocks(dn, bp, end-start+1, tx);
+ } else {
+ for (i = start; i <= end; i++, bp++) {
+ if (BP_IS_HOLE(bp))
+ continue;
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
+ i, B_TRUE, FTAG, &subdb));
+ rw_exit(&dn->dn_struct_rwlock);
+ ASSERT3P(bp, ==, subdb->db_blkptr);
+
+ free_children(subdb, blkid, nblks, tx);
+ dbuf_rele(subdb, FTAG);
+ }
}
- for (i = start; i <= end; i++, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
- ASSERT0(err);
- rw_exit(&dn->dn_struct_rwlock);
-
- if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
- ASSERT3P(subdb->db_blkptr, ==, bp);
- blocks_freed += free_blocks(dn, bp, 1, tx);
- } else {
- all = FALSE;
- }
- dbuf_rele(subdb, FTAG);
+ /* If this whole block is free, free ourself too. */
+ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+ if (!BP_IS_HOLE(bp))
+ break;
+ }
+ if (i == 1 << epbs) {
+ /* didn't find any non-holes */
+ bzero(db->db.db_data, db->db.db_size);
+ free_blocks(dn, db->db_blkptr, 1, tx);
+ } else {
+ /*
+ * Partial block free; must be marked dirty so that it
+ * will be written out.
+ */
+ ASSERT(db->db_dirtycnt > 0);
}
+
DB_DNODE_EXIT(db);
arc_buf_freeze(db->db_buf);
-#ifdef ZFS_DEBUG
- bp -= (end-start)+1;
- for (i = start; i <= end; i++, bp++) {
- if (i == start && blkid != 0)
- continue;
- else if (i == end && !trunc)
- continue;
- ASSERT0(bp->blk_birth);
- }
-#endif
- ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
- return (all ? ALL : blocks_freed);
}
/*
@@ -306,20 +318,21 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
* and "free" all the blocks contained there.
*/
static void
-dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
+dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks,
+ dmu_tx_t *tx)
{
blkptr_t *bp = dn->dn_phys->dn_blkptr;
- dmu_buf_impl_t *db;
- int trunc, start, end, shift, i, err;
int dnlevel = dn->dn_phys->dn_nlevels;
+ boolean_t trunc = B_FALSE;
if (blkid > dn->dn_phys->dn_maxblkid)
return;
ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
- trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
- if (trunc)
+ if (blkid + nblks > dn->dn_phys->dn_maxblkid) {
nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
+ trunc = B_TRUE;
+ }
/* There are no indirect blocks in the object */
if (dnlevel == 1) {
@@ -328,41 +341,35 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
return;
}
ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
- (void) free_blocks(dn, bp + blkid, nblks, tx);
- if (trunc) {
- uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
- (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
- ASSERT(off < dn->dn_phys->dn_maxblkid ||
- dn->dn_phys->dn_maxblkid == 0 ||
- dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
- }
- return;
- }
-
- shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
- start = blkid >> shift;
- ASSERT(start < dn->dn_phys->dn_nblkptr);
- end = (blkid + nblks - 1) >> shift;
- bp += start;
- for (i = start; i <= end; i++, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
- ASSERT0(err);
- rw_exit(&dn->dn_struct_rwlock);
+ free_blocks(dn, bp + blkid, nblks, tx);
+ } else {
+ int shift = (dnlevel - 1) *
+ (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
+ int start = blkid >> shift;
+ int end = (blkid + nblks - 1) >> shift;
+ dmu_buf_impl_t *db;
+
+ ASSERT(start < dn->dn_phys->dn_nblkptr);
+ bp += start;
+ for (int i = start; i <= end; i++, bp++) {
+ if (BP_IS_HOLE(bp))
+ continue;
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
+ TRUE, FTAG, &db));
+ rw_exit(&dn->dn_struct_rwlock);
+
+ free_children(db, blkid, nblks, tx);
+ dbuf_rele(db, FTAG);
- if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
- ASSERT3P(db->db_blkptr, ==, bp);
- (void) free_blocks(dn, bp, 1, tx);
}
- dbuf_rele(db, FTAG);
}
+
if (trunc) {
+ dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
+
uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
(dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
ASSERT(off < dn->dn_phys->dn_maxblkid ||
dn->dn_phys->dn_maxblkid == 0 ||
dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
@@ -503,7 +510,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dn->dn_free_txg > 0);
if (dn->dn_allocated_txg != dn->dn_free_txg)
- dbuf_will_dirty(dn->dn_dbuf, tx);
+ dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
bzero(dn->dn_phys, sizeof (dnode_phys_t));
mutex_enter(&dn->dn_mtx);
@@ -610,13 +617,15 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dn->dn_next_bonustype[txgoff] = 0;
}
+ boolean_t freeing_dnode = dn->dn_free_txg > 0 &&
+ dn->dn_free_txg <= tx->tx_txg;
+
/*
* We will either remove a spill block when a file is being removed
* or we have been asked to remove it.
*/
if (dn->dn_rm_spillblk[txgoff] ||
- ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
- dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
+ ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && freeing_dnode)) {
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
kill_spill = B_TRUE;
dn->dn_rm_spillblk[txgoff] = 0;
@@ -639,7 +648,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
if (kill_spill) {
- (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
+ free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
mutex_enter(&dn->dn_mtx);
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
mutex_exit(&dn->dn_mtx);
@@ -655,7 +664,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
kmem_free(rp, sizeof (free_range_t));
}
- if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
+ if (freeing_dnode) {
dnode_sync_free(dn, tx);
return;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
index 0c5760b..928d94a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
@@ -120,17 +120,16 @@ int
dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
boolean_t async)
{
+ int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
+ int compressed = BP_GET_PSIZE(bp);
+ int uncompressed = BP_GET_UCSIZE(bp);
+
if (BP_IS_HOLE(bp))
return (0);
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(bp->blk_birth <= tx->tx_txg);
- int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
- int compressed = BP_GET_PSIZE(bp);
- int uncompressed = BP_GET_UCSIZE(bp);
-
- ASSERT(used > 0);
if (ds == NULL) {
dsl_free(tx->tx_pool, tx->tx_txg, bp);
dsl_pool_mos_diduse_space(tx->tx_pool,
@@ -228,7 +227,8 @@ boolean_t
dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
uint64_t blk_birth)
{
- if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
+ if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
+ (bp != NULL && BP_IS_HOLE(bp)))
return (B_FALSE);
ddt_prefetch(dsl_dataset_get_spa(ds), bp);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
index 047fc63..6f75b0d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
@@ -144,6 +144,8 @@ process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
struct process_old_arg *poa = arg;
dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
+ ASSERT(!BP_IS_HOLE(bp));
+
if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
if (poa->ds_prev && !poa->after_branch_point &&
@@ -536,7 +538,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
- if (bp == NULL)
+ if (BP_IS_HOLE(bp))
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index 8576a22..35e867d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -481,7 +481,7 @@ dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
zil_header_t *zh = zsa->zsa_zh;
zbookmark_t zb;
- if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
+ if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return (0);
/*
@@ -513,7 +513,8 @@ dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_t zb;
- if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
+ if (BP_IS_HOLE(bp) ||
+ bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return (0);
/*
@@ -765,7 +766,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
if (dsl_scan_check_resume(scn, dnp, zb))
return;
- if (bp->blk_birth == 0)
+ if (BP_IS_HOLE(bp))
return;
scn->scn_visited_this_txg++;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 4819559..1737269 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -1877,7 +1877,7 @@ static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
- if (bp != NULL) {
+ if (!BP_IS_HOLE(bp)) {
zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
@@ -2401,6 +2401,33 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
ENOTSUP));
}
+
+ /*
+ * Load refcounts for ZFS features from disk into an in-memory
+ * cache during SPA initialization.
+ */
+ for (spa_feature_t i = 0; i < SPA_FEATURES; i++) {
+ uint64_t refcount;
+
+ error = feature_get_refcount_from_disk(spa,
+ &spa_feature_table[i], &refcount);
+ if (error == 0) {
+ spa->spa_feat_refcount_cache[i] = refcount;
+ } else if (error == ENOTSUP) {
+ spa->spa_feat_refcount_cache[i] =
+ SPA_FEATURE_DISABLED;
+ } else {
+ return (spa_vdev_err(rvd,
+ VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+ }
+ }
+
+ if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
+ if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG,
+ &spa->spa_feat_enabled_txg_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
}
spa->spa_is_initializing = B_TRUE;
@@ -6094,7 +6121,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
/*
* Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
- * information. This avoids the dbuf_will_dirty() path and
+ * information. This avoids the dmu_buf_will_dirty() path and
* saves us a pre-read to get data we don't actually care about.
*/
bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 65ba71b..732ca26 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -634,6 +634,15 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
+ /*
+ * As a pool is being created, treat all features as disabled by
+ * setting SPA_FEATURE_DISABLED for all entries in the feature
+ * refcount cache.
+ */
+ for (int i = 0; i < SPA_FEATURES; i++) {
+ spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
+ }
+
return (spa);
}
@@ -1191,11 +1200,19 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
*/
void
-spa_activate_mos_feature(spa_t *spa, const char *feature)
+spa_activate_mos_feature(spa_t *spa, const char *feature, dmu_tx_t *tx)
{
if (!nvlist_exists(spa->spa_label_features, feature)) {
fnvlist_add_boolean(spa->spa_label_features, feature);
- vdev_config_dirty(spa->spa_root_vdev);
+ /*
+ * When we are creating the pool (tx_txg==TXG_INITIAL), we can't
+ * dirty the vdev config because lock SCL_CONFIG is not held.
+ * Thankfully, in this case we don't need to dirty the config
+ * because it will be written out anyway when we finish
+ * creating the pool.
+ */
+ if (tx->tx_txg != TXG_INITIAL)
+ vdev_config_dirty(spa->spa_root_vdev);
}
}
@@ -1354,7 +1371,7 @@ spa_generate_guid(spa_t *spa)
}
void
-sprintf_blkptr(char *buf, const blkptr_t *bp)
+snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
{
char type[256];
char *checksum = NULL;
@@ -1376,7 +1393,8 @@ sprintf_blkptr(char *buf, const blkptr_t *bp)
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
- SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
+ SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
+ compress);
}
void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
index b8f5eeb..207834d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
@@ -268,8 +268,6 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
-void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
@@ -295,20 +293,6 @@ void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))
#define DB_DNODE_EXIT(_db) (zrl_remove(&DB_DNODE_LOCK(_db)))
#define DB_DNODE_HELD(_db) (!zrl_is_zero(&DB_DNODE_LOCK(_db)))
-#define DB_GET_SPA(_spa_p, _db) { \
- dnode_t *__dn; \
- DB_DNODE_ENTER(_db); \
- __dn = DB_DNODE(_db); \
- *(_spa_p) = __dn->dn_objset->os_spa; \
- DB_DNODE_EXIT(_db); \
-}
-#define DB_GET_OBJSET(_os_p, _db) { \
- dnode_t *__dn; \
- DB_DNODE_ENTER(_db); \
- __dn = DB_DNODE(_db); \
- *(_os_p) = __dn->dn_objset; \
- DB_DNODE_EXIT(_db); \
-}
void dbuf_init(void);
void dbuf_fini(void);
@@ -358,7 +342,7 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
- sprintf_blkptr(__blkbuf, bp); \
+ snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index a10faa3..35f94d2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -295,6 +295,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
+#define DMU_POOL_FEATURE_ENABLED_TXG "feature_enabled_txg"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index 0fb2794..75ab2af 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -65,20 +65,33 @@ struct dsl_dataset;
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
-#define BF32_SET(x, low, len, val) \
- ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
-#define BF64_SET(x, low, len, val) \
- ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
+#define BF32_SET(x, low, len, val) do { \
+ ASSERT3U(val, <, 1U << (len)); \
+ ASSERT3U(low + len, <=, 32); \
+ (x) ^= BF32_ENCODE((x >> low) ^ (val), low, len); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BF64_SET(x, low, len, val) do { \
+ ASSERT3U(val, <, 1ULL << (len)); \
+ ASSERT3U(low + len, <=, 64); \
+ ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)); \
+_NOTE(CONSTCOND) } while (0)
#define BF32_GET_SB(x, low, len, shift, bias) \
((BF32_GET(x, low, len) + (bias)) << (shift))
#define BF64_GET_SB(x, low, len, shift, bias) \
((BF64_GET(x, low, len) + (bias)) << (shift))
-#define BF32_SET_SB(x, low, len, shift, bias, val) \
- BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
-#define BF64_SET_SB(x, low, len, shift, bias, val) \
- BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
+#define BF32_SET_SB(x, low, len, shift, bias, val) do { \
+ ASSERT(IS_P2ALIGNED(val, 1U << shift)); \
+ ASSERT3S((val) >> (shift), >=, bias); \
+ BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \
+_NOTE(CONSTCOND) } while (0)
+#define BF64_SET_SB(x, low, len, shift, bias, val) do { \
+ ASSERT(IS_P2ALIGNED(val, 1ULL << shift)); \
+ ASSERT3S((val) >> (shift), >=, bias); \
+ BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \
+_NOTE(CONSTCOND) } while (0)
/*
* We currently support nine block sizes, from 512 bytes to 128K.
@@ -197,6 +210,15 @@ typedef struct zio_cksum {
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
+/*
+ * A block is a hole when it has either 1) never been written to, or
+ * 2) is zero-filled. In both cases, ZFS can return all zeroes for all reads
+ * without physically allocating disk space. Holes are represented in the
+ * blkptr_t structure by zeroed blk_dva. Correct checking for holes is
+ * done through the BP_IS_HOLE macro. For holes, the logical size, level,
+ * DMU object type, and birth times are all also stored for holes that
+ * were written to at some point (i.e. were punched after having been filled).
+ */
typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
@@ -211,9 +233,10 @@ typedef struct blkptr {
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
- BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
+ BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_ASIZE(dva, x) \
- BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
+ BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
+ SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
@@ -230,14 +253,14 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
+ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_LSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
+ BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_PSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
+ BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_PSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
+ BF64_SET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
@@ -257,7 +280,7 @@ typedef struct blkptr {
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
-#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
+#define BP_GET_BYTEORDER(bp) BF64_GET((bp)->blk_prop, 63, 1)
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \
@@ -315,7 +338,9 @@ typedef struct blkptr {
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
-#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
+#define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
+ (dva)->dva_word[1] == 0ULL)
+#define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp))
/* BP_IS_RAIDZ(bp) assumes no block compression */
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
@@ -338,14 +363,10 @@ typedef struct blkptr {
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
-/*
- * Note: the byteorder is either 0 or -1, both of which are palindromes.
- * This simplifies the endianness handling a bit.
- */
#if BYTE_ORDER == _BIG_ENDIAN
#define ZFS_HOST_BYTEORDER (0ULL)
#else
-#define ZFS_HOST_BYTEORDER (-1ULL)
+#define ZFS_HOST_BYTEORDER (1ULL)
#endif
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
@@ -357,18 +378,22 @@ typedef struct blkptr {
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
-#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \
+#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
- int size = BP_SPRINTF_LEN; \
int len = 0; \
int copies = 0; \
\
if (bp == NULL) { \
- len = func(buf + len, size - len, "<NULL>"); \
+ len += func(buf + len, size - len, "<NULL>"); \
} else if (BP_IS_HOLE(bp)) { \
- len = func(buf + len, size - len, "<hole>"); \
+ len += func(buf + len, size - len, "<hole>"); \
+ if (bp->blk_birth > 0) { \
+ len += func(buf + len, size - len, \
+ " birth=%lluL", \
+ (u_longlong_t)bp->blk_birth); \
+ } \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
@@ -614,7 +639,8 @@ extern objset_t *spa_meta_objset(spa_t *spa);
extern uint64_t spa_deadman_synctime(spa_t *spa);
/* Miscellaneous support routines */
-extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
+extern void spa_activate_mos_feature(spa_t *spa, const char *feature,
+ dmu_tx_t *tx);
extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
extern int spa_rename(const char *oldname, const char *newname);
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
@@ -623,7 +649,7 @@ extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern uint64_t spa_generate_guid(spa_t *spa);
-extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
+extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern int spa_change_guid(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
@@ -692,9 +718,9 @@ extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
+ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
- sprintf_blkptr(__blkbuf, (bp)); \
+ snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index fa11ccf..6fd44c8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -39,6 +39,7 @@
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
+#include <zfeature_common.h>
#ifdef __cplusplus
extern "C" {
@@ -237,6 +238,9 @@ struct spa {
uint64_t spa_feat_for_write_obj; /* required to write to pool */
uint64_t spa_feat_for_read_obj; /* required to read from pool */
uint64_t spa_feat_desc_obj; /* Feature descriptions */
+ uint64_t spa_feat_enabled_txg_obj; /* Feature enabled txg */
+ /* cache feature refcounts */
+ uint64_t spa_feat_refcount_cache[SPA_FEATURES];
#ifdef illumos
cyclic_id_t spa_deadman_cycid; /* cyclic id */
#else /* FreeBSD */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index fb30ea9..2cbcf9c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -112,7 +112,7 @@ extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
extern void vdev_cache_init(vdev_t *vd);
extern void vdev_cache_fini(vdev_t *vd);
-extern int vdev_cache_read(zio_t *zio);
+extern boolean_t vdev_cache_read(zio_t *zio);
extern void vdev_cache_write(zio_t *zio);
extern void vdev_cache_purge(vdev_t *vd);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
index c2ca63f..5abde14 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
@@ -34,6 +34,10 @@
extern "C" {
#endif
+#define VALID_FEATURE_FID(fid) ((fid) >= 0 && (fid) < SPA_FEATURES)
+#define VALID_FEATURE_OR_NONE(fid) ((fid) == SPA_FEATURE_NONE || \
+ VALID_FEATURE_FID(fid))
+
struct spa;
struct dmu_tx;
struct objset;
@@ -45,6 +49,8 @@ extern void spa_feature_incr(struct spa *, spa_feature_t, struct dmu_tx *);
extern void spa_feature_decr(struct spa *, spa_feature_t, struct dmu_tx *);
extern boolean_t spa_feature_is_enabled(struct spa *, spa_feature_t);
extern boolean_t spa_feature_is_active(struct spa *, spa_feature_t);
+extern boolean_t spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid,
+ uint64_t *txg);
extern uint64_t spa_feature_refcount(spa_t *, spa_feature_t, uint64_t);
extern boolean_t spa_features_check(spa_t *, boolean_t, nvlist_t *, nvlist_t *);
@@ -53,6 +59,8 @@ extern boolean_t spa_features_check(spa_t *, boolean_t, nvlist_t *, nvlist_t *);
* use the above interfaces.
*/
extern int feature_get_refcount(struct spa *, zfeature_info_t *, uint64_t *);
+extern int feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
+ uint64_t *res);
extern void feature_enable_sync(struct spa *, zfeature_info_t *,
struct dmu_tx *);
extern void feature_sync(struct spa *, zfeature_info_t *, uint64_t,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
index 9757340..2ad15fe 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
@@ -260,9 +260,9 @@ vdev_cache_fill(zio_t *fio)
}
/*
- * Read data from the cache. Returns 0 on cache hit, errno on a miss.
+ * Read data from the cache. Returns B_TRUE cache hit, B_FALSE on miss.
*/
-int
+boolean_t
vdev_cache_read(zio_t *zio)
{
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
@@ -274,16 +274,16 @@ vdev_cache_read(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ);
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
- return (SET_ERROR(EINVAL));
+ return (B_FALSE);
if (zio->io_size > zfs_vdev_cache_max)
- return (SET_ERROR(EOVERFLOW));
+ return (B_FALSE);
/*
* If the I/O straddles two or more cache blocks, don't cache it.
*/
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
- return (SET_ERROR(EXDEV));
+ return (B_FALSE);
ASSERT(cache_phase + zio->io_size <= VCBS);
@@ -295,7 +295,7 @@ vdev_cache_read(zio_t *zio)
if (ve != NULL) {
if (ve->ve_missed_update) {
mutex_exit(&vc->vc_lock);
- return (SET_ERROR(ESTALE));
+ return (B_FALSE);
}
if ((fio = ve->ve_fill_io) != NULL) {
@@ -303,7 +303,7 @@ vdev_cache_read(zio_t *zio)
zio_add_child(zio, fio);
mutex_exit(&vc->vc_lock);
VDCSTAT_BUMP(vdc_stat_delegations);
- return (0);
+ return (B_TRUE);
}
vdev_cache_hit(vc, ve, zio);
@@ -311,14 +311,14 @@ vdev_cache_read(zio_t *zio)
mutex_exit(&vc->vc_lock);
VDCSTAT_BUMP(vdc_stat_hits);
- return (0);
+ return (B_TRUE);
}
ve = vdev_cache_allocate(zio);
if (ve == NULL) {
mutex_exit(&vc->vc_lock);
- return (SET_ERROR(ENOMEM));
+ return (B_FALSE);
}
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
@@ -333,7 +333,7 @@ vdev_cache_read(zio_t *zio)
zio_nowait(fio);
VDCSTAT_BUMP(vdc_stat_misses);
- return (0);
+ return (B_TRUE);
}
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
index 3e5b3e0..9ee8545 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
@@ -262,8 +262,6 @@ vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz)
FKIOCTL, kcred, NULL) == 0) {
uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
- zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu",
- vd->vdev_path, capacity, efi_altern_lba);
if (capacity > efi_altern_lba)
avail_space = (capacity - efi_altern_lba) * blksz;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
index 62a8871..e69fcd3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
@@ -216,6 +216,8 @@ spa_features_check(spa_t *spa, boolean_t for_write,
}
/*
+ * Use an in-memory cache of feature refcounts for quick retrieval.
+ *
* Note: well-designed features will not need to use this; they should
* use spa_feature_is_enabled() and spa_feature_is_active() instead.
* However, this is non-static for zdb and zhack.
@@ -223,6 +225,24 @@ spa_features_check(spa_t *spa, boolean_t for_write,
int
feature_get_refcount(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
{
+ ASSERT(VALID_FEATURE_FID(feature->fi_feature));
+ if (spa->spa_feat_refcount_cache[feature->fi_feature] ==
+ SPA_FEATURE_DISABLED) {
+ return (SET_ERROR(ENOTSUP));
+ }
+ *res = spa->spa_feat_refcount_cache[feature->fi_feature];
+ return (0);
+}
+
+/*
+ * Note: well-designed features will not need to use this; they should
+ * use spa_feature_is_enabled() and spa_feature_is_active() instead.
+ * However, this is non-static for zdb and zhack.
+ */
+int
+feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
+ uint64_t *res)
+{
int err;
uint64_t refcount;
uint64_t zapobj = feature->fi_can_readonly ?
@@ -247,6 +267,26 @@ feature_get_refcount(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
return (0);
}
+
+static int
+feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) {
+ uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj;
+
+ ASSERT(zfeature_depends_on(feature->fi_feature,
+ SPA_FEATURE_ENABLED_TXG));
+
+ if (!spa_feature_is_enabled(spa, feature->fi_feature)) {
+ return (SET_ERROR(ENOTSUP));
+ }
+
+ ASSERT(enabled_txg_obj != 0);
+
+ VERIFY0(zap_lookup(spa->spa_meta_objset, spa->spa_feat_enabled_txg_obj,
+ feature->fi_guid, sizeof (uint64_t), 1, res));
+
+ return (0);
+}
+
/*
* This function is non-static for zhack; it should otherwise not be used
* outside this file.
@@ -255,16 +295,31 @@ void
feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
dmu_tx_t *tx)
{
+ ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
uint64_t zapobj = feature->fi_can_readonly ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount, tx));
+ /*
+ * feature_sync is called directly from zhack, allowing the
+ * creation of arbitrary features whose fi_feature field may
+ * be greater than SPA_FEATURES. When called from zhack, the
+ * zfeature_info_t object's fi_feature field will be set to
+ * SPA_FEATURE_NONE.
+ */
+ if (feature->fi_feature != SPA_FEATURE_NONE) {
+ uint64_t *refcount_cache =
+ &spa->spa_feat_refcount_cache[feature->fi_feature];
+ VERIFY3U(*refcount_cache, ==,
+ atomic_swap_64(refcount_cache, refcount));
+ }
+
if (refcount == 0)
spa_deactivate_mos_feature(spa, feature->fi_guid);
else if (feature->fi_mos)
- spa_activate_mos_feature(spa, feature->fi_guid);
+ spa_activate_mos_feature(spa, feature->fi_guid, tx);
}
/*
@@ -274,6 +329,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
void
feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
+ uint64_t initial_refcount = feature->fi_activate_on_enable ? 1 : 0;
uint64_t zapobj = feature->fi_can_readonly ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
@@ -293,7 +349,24 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
VERIFY0(zap_update(spa->spa_meta_objset, spa->spa_feat_desc_obj,
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
feature->fi_desc, tx));
- feature_sync(spa, feature, 0, tx);
+
+ feature_sync(spa, feature, initial_refcount, tx);
+
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) {
+ uint64_t enabling_txg = dmu_tx_get_txg(tx);
+
+ if (spa->spa_feat_enabled_txg_obj == 0ULL) {
+ spa->spa_feat_enabled_txg_obj =
+ zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURE_ENABLED_TXG, tx);
+ }
+ spa_feature_incr(spa, SPA_FEATURE_ENABLED_TXG, tx);
+
+ VERIFY0(zap_add(spa->spa_meta_objset,
+ spa->spa_feat_enabled_txg_obj, feature->fi_guid,
+ sizeof (uint64_t), 1, &enabling_txg, tx));
+ }
}
static void
@@ -305,15 +378,14 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
uint64_t zapobj = feature->fi_can_readonly ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
- ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(VALID_FEATURE_FID(fid));
ASSERT(0 != zapobj);
ASSERT(zfeature_is_valid_guid(feature->fi_guid));
ASSERT(dmu_tx_is_syncing(tx));
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
- VERIFY0(zap_lookup(spa->spa_meta_objset, zapobj, feature->fi_guid,
- sizeof (uint64_t), 1, &refcount));
+ VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP);
switch (action) {
case FEATURE_ACTION_INCR:
@@ -360,7 +432,7 @@ void
spa_feature_enable(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx)
{
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
- ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(VALID_FEATURE_FID(fid));
feature_enable_sync(spa, &spa_feature_table[fid], tx);
}
@@ -382,7 +454,7 @@ spa_feature_is_enabled(spa_t *spa, spa_feature_t fid)
int err;
uint64_t refcount;
- ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(VALID_FEATURE_FID(fid));
if (spa_version(spa) < SPA_VERSION_FEATURES)
return (B_FALSE);
@@ -397,7 +469,7 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid)
int err;
uint64_t refcount;
- ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(VALID_FEATURE_FID(fid));
if (spa_version(spa) < SPA_VERSION_FEATURES)
return (B_FALSE);
@@ -405,3 +477,26 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid)
ASSERT(err == 0 || err == ENOTSUP);
return (err == 0 && refcount > 0);
}
+
+/*
+ * For the feature specified by fid (which must depend on
+ * SPA_FEATURE_ENABLED_TXG), return the TXG at which it was enabled in the
+ * OUT txg argument.
+ *
+ * Returns B_TRUE if the feature is enabled, in which case txg will be filled
+ * with the transaction group in which the specified feature was enabled.
+ * Returns B_FALSE otherwise (i.e. if the feature is not enabled).
+ */
+boolean_t
+spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) {
+ int err;
+
+ ASSERT(VALID_FEATURE_FID(fid));
+ if (spa_version(spa) < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ err = feature_get_enabled_txg(spa, &spa_feature_table[fid], txg);
+ ASSERT(err == 0 || err == ENOTSUP);
+
+ return (err == 0);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
index a3a1338..5c18671 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
@@ -810,10 +810,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
*/
if (vap->va_type == VDIR) {
if (zfsvfs->z_replay) {
- err = zap_create_claim_norm(zfsvfs->z_os, obj,
+ VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj,
zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
- obj_type, bonuslen, tx);
- ASSERT0(err);
+ obj_type, bonuslen, tx));
} else {
obj = zap_create_norm(zfsvfs->z_os,
zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
@@ -821,10 +820,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
} else {
if (zfsvfs->z_replay) {
- err = dmu_object_claim(zfsvfs->z_os, obj,
+ VERIFY0(dmu_object_claim(zfsvfs->z_os, obj,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
- obj_type, bonuslen, tx);
- ASSERT0(err);
+ obj_type, bonuslen, tx));
} else {
obj = dmu_object_alloc(zfsvfs->z_os,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
@@ -1006,8 +1004,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
if (obj_type == DMU_OT_ZNODE ||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
- err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
- ASSERT0(err);
+ VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
}
if (!(flag & IS_ROOT_NODE)) {
vnode_t *vp;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
index b26ca3d..ea5bb13a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
@@ -383,7 +383,8 @@ zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
* Claim log block if not already committed and not already claimed.
* If tx == NULL, just verify that the block is claimable.
*/
- if (bp->blk_birth < first_txg || zil_bp_tree_add(zilog, bp) != 0)
+ if (BP_IS_HOLE(bp) || bp->blk_birth < first_txg ||
+ zil_bp_tree_add(zilog, bp) != 0)
return (0);
return (zio_wait(zio_claim(NULL, zilog->zl_spa,
@@ -433,7 +434,8 @@ zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
* If we previously claimed it, we need to free it.
*/
if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE &&
- bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0)
+ bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 &&
+ !BP_IS_HOLE(bp))
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index fe440af..a7f1206 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -37,6 +37,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/trim_map.h>
+#include <sys/zfeature.h>
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
@@ -1105,7 +1106,7 @@ zio_write_bp_init(zio_t *zio)
BP_ZERO(bp);
}
- if (bp->blk_birth == zio->io_txg) {
+ if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
/*
* We're rewriting an existing block, which means we're
* working on behalf of spa_sync(). For spa_sync() to
@@ -1151,7 +1152,8 @@ zio_write_bp_init(zio_t *zio)
* spa_sync() to allocate new blocks, but force rewrites after that.
* There should only be a handful of blocks after pass 1 in any case.
*/
- if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
+ if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg &&
+ BP_GET_PSIZE(bp) == psize &&
pass >= zfs_sync_pass_rewrite) {
ASSERT(psize != 0);
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
@@ -1163,15 +1165,22 @@ zio_write_bp_init(zio_t *zio)
}
if (psize == 0) {
+ if (zio->io_bp_orig.blk_birth != 0 &&
+ spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
+ BP_SET_LSIZE(bp, lsize);
+ BP_SET_TYPE(bp, zp->zp_type);
+ BP_SET_LEVEL(bp, zp->zp_level);
+ BP_SET_BIRTH(bp, zio->io_txg, 0);
+ }
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
} else {
ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
BP_SET_LSIZE(bp, lsize);
+ BP_SET_TYPE(bp, zp->zp_type);
+ BP_SET_LEVEL(bp, zp->zp_level);
BP_SET_PSIZE(bp, psize);
BP_SET_COMPRESS(bp, compress);
BP_SET_CHECKSUM(bp, zp->zp_checksum);
- BP_SET_TYPE(bp, zp->zp_type);
- BP_SET_LEVEL(bp, zp->zp_level);
BP_SET_DEDUP(bp, zp->zp_dedup);
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
if (zp->zp_dedup) {
@@ -2585,7 +2594,7 @@ zio_vdev_io_start(zio_t *zio)
if (vd->vdev_ops->vdev_op_leaf &&
(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
- if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
+ if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
return (ZIO_PIPELINE_CONTINUE);
if ((zio = vdev_queue_io(zio)) == NULL)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
index 2154fdf..64c97c9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
@@ -282,7 +282,8 @@ zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zvol_extent_t *ze;
int bs = ma->ma_zv->zv_volblocksize;
- if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
+ if (BP_IS_HOLE(bp) ||
+ zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
return (0);
VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
OpenPOWER on IntegriCloud