summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2014-03-19 23:55:03 +0000
committerdelphij <delphij@FreeBSD.org>2014-03-19 23:55:03 +0000
commitede49d4106ad9e6d34a2592322e48604f72605bd (patch)
tree0012bcb04d95eaf4e65ed2f983e9d51d47236888 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
parent8e41bb96f82aa123d5f4c625c68f95b253640b28 (diff)
downloadFreeBSD-src-ede49d4106ad9e6d34a2592322e48604f72605bd.zip
FreeBSD-src-ede49d4106ad9e6d34a2592322e48604f72605bd.tar.gz
MFC r260150: MFV r259170:
4370 avoid transmitting holes during zfs send 4371 DMU code clean up illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f NOTE: Make sure the boot code is updated if a zpool upgrade is done on boot zpool.
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c135
1 files changed, 53 insertions, 82 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 0a39159..27ed6e3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -455,10 +455,9 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
int blksz = db->db.db_size;
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
mutex_exit(&db->db_mtx);
- DB_GET_SPA(&spa, db);
abuf = arc_loan_buf(spa, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
@@ -519,7 +518,6 @@ static void
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
{
dnode_t *dn;
- spa_t *spa;
zbookmark_t zb;
uint32_t aflags = ARC_NOWAIT;
@@ -559,9 +557,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
- db->db.db_size, db, type));
DB_DNODE_EXIT(db);
+ dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
+ db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
db->db_state = DB_CACHED;
*flags |= DB_RF_CACHED;
@@ -569,7 +567,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
return;
}
- spa = dn->dn_objset->os_spa;
DB_DNODE_EXIT(db);
db->db_state = DB_READ;
@@ -586,7 +583,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
dbuf_add_ref(db, NULL);
- (void) arc_read(zio, spa, db->db_blkptr,
+ (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
@@ -598,8 +595,8 @@ int
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
int err = 0;
- int havepzio = (zio != NULL);
- int prefetch;
+ boolean_t havepzio = (zio != NULL);
+ boolean_t prefetch;
dnode_t *dn;
/*
@@ -694,11 +691,10 @@ dbuf_noread(dmu_buf_impl_t *db)
cv_wait(&db->db_changed, &db->db_mtx);
if (db->db_state == DB_UNCACHED) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
- DB_GET_SPA(&spa, db);
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
@@ -753,9 +749,8 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa;
+ spa_t *spa = db->db_objset->os_spa;
- DB_GET_SPA(&spa, db);
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
@@ -781,12 +776,9 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
ASSERT(db->db_data_pending != dr);
/* free this block */
- if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) {
- spa_t *spa;
+ if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
+ zio_free(db->db_objset->os_spa, txg, bp);
- DB_GET_SPA(&spa, db);
- zio_free(spa, txg, bp);
- }
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
@@ -804,9 +796,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
/*
* Evict (if its unreferenced) or clear (if its referenced) any level-0
* data blocks in the free range, so that any future readers will find
- * empty blocks. Also, if we happen across any level-1 dbufs in the
- * range that have not already been marked dirty, mark them dirty so
- * they stay in memory.
+ * empty blocks.
*
* This is a no-op if the dataset is in the middle of an incremental
* receive; see comment below for details.
@@ -816,14 +806,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
{
dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- uint64_t first_l1 = start >> epbs;
- uint64_t last_l1 = end >> epbs;
- if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
+ if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID))
end = dn->dn_maxblkid;
- last_l1 = end >> epbs;
- }
dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
mutex_enter(&dn->dn_dbufs_mtx);
@@ -846,23 +831,8 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
db_next = list_next(&dn->dn_dbufs, db);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- if (db->db_level == 1 &&
- db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
- mutex_enter(&db->db_mtx);
- if (db->db_last_dirty &&
- db->db_last_dirty->dr_txg < txg) {
- dbuf_add_ref(db, FTAG);
- mutex_exit(&db->db_mtx);
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- } else {
- mutex_exit(&db->db_mtx);
- }
- }
-
if (db->db_level != 0)
continue;
- dprintf_dbuf(db, "found buf %s\n", "");
if (db->db_blkid < start || db->db_blkid > end)
continue;
@@ -939,24 +909,29 @@ dbuf_block_freeable(dmu_buf_impl_t *db)
* We don't need any locking to protect db_blkptr:
* If it's syncing, then db_last_dirty will be set
* so we'll ignore db_blkptr.
+ *
+ * This logic ensures that only block births for
+ * filled blocks are considered.
*/
ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_last_dirty)
+ if (db->db_last_dirty && (db->db_blkptr == NULL ||
+ !BP_IS_HOLE(db->db_blkptr))) {
birth_txg = db->db_last_dirty->dr_txg;
- else if (db->db_blkptr)
+ } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
birth_txg = db->db_blkptr->blk_birth;
+ }
/*
- * If we don't exist or are in a snapshot, we can't be freed.
+ * If this block don't exist or is in a snapshot, it can't be freed.
* Don't pass the bp to dsl_dataset_block_freeable() since we
* are holding the db_mtx lock and might deadlock if we are
* prefetching a dedup-ed block.
*/
- if (birth_txg)
+ if (birth_txg != 0)
return (ds == NULL ||
dsl_dataset_block_freeable(ds, NULL, birth_txg));
else
- return (FALSE);
+ return (B_FALSE);
}
void
@@ -976,7 +951,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
/*
- * This call to dbuf_will_dirty() with the dn_struct_rwlock held
+ * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
* is OK, because there can be no other references to the db
* when we are changing its size, so no concurrent DB_FILL can
* be happening.
@@ -985,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
* XXX we should be doing a dbuf_read, checking the return
* value and returning that up to our callers
*/
- dbuf_will_dirty(db, tx);
+ dmu_buf_will_dirty(&db->db, tx);
/* create the data buffer for the new block */
buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
@@ -1015,9 +990,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
void
dbuf_release_bp(dmu_buf_impl_t *db)
{
- objset_t *os;
+ objset_t *os = db->db_objset;
- DB_GET_OBJSET(&os, db);
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(arc_released(os->os_phys_buf) ||
list_link_active(&os->os_dsl_dataset->ds_synced_link));
@@ -1391,10 +1365,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (B_FALSE);
}
-#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
void
-dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
ASSERT(tx->tx_txg != 0);
@@ -1517,7 +1491,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
db->db_state = DB_FILL;
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
- dbuf_fill_done(db, tx);
+ dmu_buf_fill_done(&db->db, tx);
}
/*
@@ -2022,7 +1996,6 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
* Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
* dnode's parent dbuf evicting its dnode handles.
*/
-#pragma weak dmu_buf_rele = dbuf_rele
void
dbuf_rele(dmu_buf_impl_t *db, void *tag)
{
@@ -2030,6 +2003,12 @@ dbuf_rele(dmu_buf_impl_t *db, void *tag)
dbuf_rele_and_unlock(db, tag);
}
+void
+dmu_buf_rele(dmu_buf_t *db, void *tag)
+{
+ dbuf_rele((dmu_buf_impl_t *)db, tag);
+}
+
/*
* dbuf_rele() for an already-locked dbuf. This is necessary to allow
* db_dirtycnt and db_holds to be updated atomically.
@@ -2480,18 +2459,14 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta;
- if (BP_IS_HOLE(bp)) {
- ASSERT(bp->blk_fill == 0);
- DB_DNODE_EXIT(db);
- return;
+ if (bp->blk_birth != 0) {
+ ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_type) ||
+ (db->db_blkid == DMU_SPILL_BLKID &&
+ BP_GET_TYPE(bp) == dn->dn_bonustype));
+ ASSERT(BP_GET_LEVEL(bp) == db->db_level);
}
- ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_type) ||
- (db->db_blkid == DMU_SPILL_BLKID &&
- BP_GET_TYPE(bp) == dn->dn_bonustype));
- ASSERT(BP_GET_LEVEL(bp) == db->db_level);
-
mutex_enter(&db->db_mtx);
#ifdef ZFS_DEBUG
@@ -2517,7 +2492,11 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
fill++;
}
} else {
- fill = 1;
+ if (BP_IS_HOLE(bp)) {
+ fill = 0;
+ } else {
+ fill = 1;
+ }
}
} else {
blkptr_t *ibp = db->db.db_data;
@@ -2572,9 +2551,10 @@ static void
dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
{
dmu_buf_impl_t *db = vdb;
- blkptr_t *bp = zio->io_bp;
blkptr_t *bp_orig = &zio->io_bp_orig;
- uint64_t txg = zio->io_txg;
+ blkptr_t *bp = db->db_blkptr;
+ objset_t *os = db->db_objset;
+ dmu_tx_t *tx = os->os_synctx;
dbuf_dirty_record_t **drp, *dr;
ASSERT0(zio->io_error);
@@ -2587,14 +2567,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
ASSERT(BP_EQUAL(bp, bp_orig));
} else {
- objset_t *os;
- dsl_dataset_t *ds;
- dmu_tx_t *tx;
-
- DB_GET_OBJSET(&os, db);
- ds = os->os_dsl_dataset;
- tx = os->os_synctx;
-
+ dsl_dataset_t *ds = os->os_dsl_dataset;
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
dsl_dataset_block_born(ds, bp, tx);
}
@@ -2607,7 +2580,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
while ((dr = *drp) != db->db_data_pending)
drp = &dr->dr_next;
ASSERT(!list_link_active(&dr->dr_dirty_node));
- ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
ASSERT(dr->dr_next == NULL);
*drp = dr->dr_next;
@@ -2641,14 +2613,14 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
+ ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
if (!BP_IS_HOLE(db->db_blkptr)) {
int epbs =
dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ ASSERT3U(db->db_blkid, <=,
+ dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
db->db.db_size);
- ASSERT3U(dn->dn_phys->dn_maxblkid
- >> (db->db_level * epbs), >=, db->db_blkid);
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
DB_DNODE_EXIT(db);
@@ -2661,8 +2633,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
-
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
}
static void
OpenPOWER on IntegriCloud