summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2010-07-12 23:49:04 +0000
committermm <mm@FreeBSD.org>2010-07-12 23:49:04 +0000
commitb2946e89348042300795fce8f0b12a01250541df (patch)
tree528115d6014d608781cfcb91d5b3a5ba0cfcc892 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
parent1b4c9c446a5b4f260f236b1053bc42f683ef18cb (diff)
downloadFreeBSD-src-b2946e89348042300795fce8f0b12a01250541df.zip
FreeBSD-src-b2946e89348042300795fce8f0b12a01250541df.tar.gz
Merge ZFS version 15 and almost all OpenSolaris bugfixes referenced
in Solaris 10 updates 141445-09 and 142901-14. Detailed information: (OpenSolaris revisions and Bug IDs, Solaris 10 patch numbers) 7844:effed23820ae 6755435 zfs_open() and zfs_close() needs to use ZFS_ENTER/ZFS_VERIFY_ZP (141445-01) 7897:e520d8258820 6748436 inconsistent zpool.cache in boot_archive could panic a zfs root filesystem upon boot-up (141445-01) 7965:b795da521357 6740164 zpool attach can create an illegal root pool (141909-02) 8084:b811cc60d650 6769612 zpool_import() will continue to write to cachefile even if altroot is set (N/A) 8121:7fd09d4ebd9c 6757430 want an option for zdb to disable space map loading and leak tracking (141445-01) 8129:e4f45a0bfbb0 6542860 ASSERT: reason != VDEV_LABEL_REMOVE||vdev_inuse(vd, crtxg, reason, 0) (141445-01) 8188:fd00c0a81e80 6761100 want zdb option to select older uberblocks (141445-01) 8190:6eeea43ced42 6774886 zfs_setattr() won't allow ndmp to restore SUNWattr_rw (141445-01) 8225:59a9961c2aeb 6737463 panic while trying to write out config file if root pool import fails (141445-01) 8227:f7d7be9b1f56 6765294 Refactor replay (141445-01) 8228:51e9ca9ee3a5 6572357 libzfs should do more to avoid mnttab lookups (141909-01) 6572376 zfs_iter_filesystems and zfs_iter_snapshots get objset stats twice (141909-01) 8241:5a60f16123ba 6328632 zpool offline is a bit too conservative (141445-01) 6739487 ASSERT: txg <= spa_final_txg due to scrub/export race (141445-01) 6767129 ASSERT: cvd->vdev_isspare, in spa_vdev_detach() (141445-01) 6747698 checksum failures after offline -t / export / import / scrub (141445-01) 6745863 ZFS writes to disk after it has been offlined (141445-01) 6722540 50% slowdown on scrub/resilver with certain vdev configurations (141445-01) 6759999 resilver logic rewrites ditto blocks on both source and destination (141445-01) 6758107 I/O should never suspend during spa_load() (141445-01) 6776548 codereview(1) runs off the page when faced with multi-line comments (N/A) 6761406 AMD errata 91 workaround doesn't work on 64-bit systems (141445-01) 8242:e46e4b2f0a03 6770866 GRUB/ZFS should require physical path or devid, but not both (141445-01) 8269:03a7e9050cfd 6674216 "zfs share" doesn't work, but "zfs set sharenfs=on" does (141445-01) 6621164 $SRC/cmd/zfs/zfs_main.c seems to have a syntax error in the translation note (141445-01) 6635482 i18n problems in libzfs_dataset.c and zfs_main.c (141445-01) 6595194 "zfs get" VALUE column is as wide as NAME (141445-01) 6722991 vdev_disk.c: error checking for ddi_pathname_to_dev_t() must test for NODEV (141445-01) 6396518 ASSERT strings shouldn't be pre-processed (141445-01) 8274:846b39508aff 6713916 scrub/resilver needlessly decompress data (141445-01) 8343:655db2375fed 6739553 libzfs_status msgid table is out of sync (141445-01) 6784104 libzfs unfairly rejects numerical values greater than 2^63 (141445-01) 6784108 zfs_realloc() should not free original memory on failure (141445-01) 8525:e0e0e525d0f8 6788830 set large value to reservation cause core dump (141445-01) 6791064 want sysevents for ZFS scrub (141445-01) 6791066 need to be able to set cachefile on faulted pools (141445-01) 6791071 zpool_do_import() should not enable datasets on faulted pools (141445-01) 6792134 getting multiple properties on a faulted pool leads to confusion (141445-01) 8547:bcc7b46e5ff7 6792884 Vista clients cannot access .zfs (141445-01) 8632:36ef517870a3 6798384 It can take a village to raise a zio (141445-01) 8636:7e4ce9158df3 6551866 deadlock between zfs_write(), zfs_freesp(), and zfs_putapage() (141909-01) 6504953 zfs_getpage() misunderstands VOP_GETPAGE() interface (141909-01) 6702206 ZFS read/writer lock contention throttles sendfile() benchmark (141445-01) 6780491 Zone on a ZFS filesystem has poor fork/exec performance (141445-01) 6747596 assertion failed: DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig), BP_IDENTITY(zio->io_bp))); (141445-01) 8692:692d4668b40d 6801507 ZFS read aggregation should not mind the gap (141445-01) 8697:e62d2612c14d 6633095 creating a filesystem with many properties set is slow (141445-01) 8768:dfecfdbb27ed 6775697 oracle crashes when overwriting after hitting quota on zfs (141909-01) 8811:f8deccf701cf 6790687 libzfs mnttab caching ignores external changes (141445-01) 6791101 memory leak from libzfs_mnttab_init (141445-01) 8845:91af0d9c0790 6800942 smb_session_create() incorrectly stores IP addresses (N/A) 6582163 Access Control List (ACL) for shares (141445-01) 6804954 smb_search - shortname field should be space padded following the NULL terminator (N/A) 6800184 Panic at smb_oplock_conflict+0x35() (N/A) 8876:59d2e67b4b65 6803822 Reboot after replacement of system disk in a ZFS mirror drops to grub> prompt (141445-01) 8924:5af812f84759 6789318 coredump when issue zdb -uuuu poolname/ (141445-01) 6790345 zdb -dddd -e poolname coredump (141445-01) 6797109 zdb: 'zdb -dddddd pool_name/fs_name inode' coredump if the file with inode was deleted (141445-01) 6797118 zdb: 'zdb -dddddd poolname inum' coredump if I miss the fs name (141445-01) 6803343 shareiscsi=on failed, iscsitgtd failed request to share (141445-01) 9030:243fd360d81f 6815893 hang mounting a dataset after booting into a new boot environment (141445-01) 9056:826e1858a846 6809691 'zpool create -f' no longer overwrites ufs infomation (141445-01) 9179:d8fbd96b79b3 6790064 zfs needs to determine uid and gid earlier in create process (141445-01) 9214:8d350e5d04aa 6604992 forced unmount + being in .zfs/snapshot/<snap1> = not happy (141909-01) 6810367 assertion failed: dvp->v_flag & VROOT, file: ../../common/fs/gfs.c, line: 426 (141909-01) 9229:e3f8b41e5db4 6807765 ztest_dsl_dataset_promote_busy needs to clean up after ENOSPC (141445-01) 9230:e4561e3eb1ef 6821169 offlining a device results in checksum errors (141445-01) 6821170 ZFS should not increment error stats for unavailable devices (141445-01) 6824006 need to increase issue and interrupt taskqs threads in zfs (141445-01) 9234:bffdc4fc05c4 6792139 recovering from a suspended pool needs some work (141445-01) 6794830 reboot command hangs on a failed zfs pool (141445-01) 9246:67c03c93c071 6824062 System panicked in zfs_mount due to NULL pointer dereference when running btts and svvs tests (141909-01) 9276:a8a7fc849933 6816124 System crash running zpool destroy on broken zpool (141445-03) 9355:09928982c591 6818183 zfs snapshot -r is slow due to set_snap_props() doing txg_wait_synced() for each new snapshot (141445-03) 9391:413d0661ef33 6710376 log device can show incorrect status when other parts of pool are degraded (141445-03) 9396:f41cf682d0d3 (part already merged) 6501037 want user/group quotas on ZFS (141445-03) 6827260 assertion failed in arc_read(): hdr == pbuf->b_hdr (141445-03) 6815592 panic: No such hold X on refcount Y from zfs_znode_move (141445-03) 6759986 zfs list shows temporary %clone when doing online zfs recv (141445-03) 9404:319573cd93f8 6774713 zfs ignores canmount=noauto when sharenfs property != off (141445-03) 9412:4aefd8704ce0 6717022 ZFS DMU needs zero-copy support (141445-03) 9425:e7ffacaec3a8 6799895 spa_add_spares() needs to be protected by config lock (141445-03) 6826466 want to post sysevents on hot spare activation (141445-03) 6826468 spa 'allowfaulted' needs some work (141445-03) 6826469 kernel support for storing vdev FRU information (141445-03) 6826470 skip posting checksum errors from DTL regions of leaf vdevs (141445-03) 6826471 I/O errors after device remove probe can confuse FMA (141445-03) 6826472 spares should enjoy some of the benefits of cache devices (141445-03) 9443:2a96d8478e95 6833711 gang leaders shouldn't have to be logical (141445-03) 9463:d0bd231c7518 6764124 want zdb to be able to checksum metadata blocks only (141445-03) 9465:8372081b8019 6830237 zfs panic in zfs_groupmember() (141445-03) 9466:1fdfd1fed9c4 6833162 phantom log device in zpool status (141445-03) 9469:4f68f041ddcd 6824968 add ZFS userquota support to rquotad (141445-03) 9470:6d827468d7b5 6834217 godfather I/O should reexecute (141445-03) 9480:fcff33da767f 6596237 Stop looking and start ganging (141909-02) 9493:9933d599bc93 6623978 lwb->lwb_buf != NULL, file ../../../uts/common/fs/zfs/zil.c, line 787, function zil_lwb_commit (141445-06) 9512:64cafcbcc337 6801810 Commit of aligned streaming rewrites to ZIL device causes unwanted disk reads (N/A) 9515:d3b739d9d043 6586537 async zio taskqs can block out userland commands (142901-09) 9554:787363635b6a 6836768 zfs_userspace() callback has no way to indicate failure (N/A) 9574:1eb6a6ab2c57 6838062 zfs panics when an error is encountered in space_map_load() (141909-02) 9583:b0696cd037cc 6794136 Panic BAD TRAP: type=e when importing degraded zraid pool. (141909-03) 9630:e25a03f552e0 6776104 "zfs import" deadlock between spa_unload() and spa_async_thread() (141445-06) 9653:a70048a304d1 6664765 Unable to remove files when using fat-zap and quota exceeded on ZFS filesystem (141445-06) 9688:127be1845343 6841321 zfs userspace / zfs get userused@ doesn't work on mounted snapshot (N/A) 6843069 zfs get userused@S-1-... doesn't work (N/A) 9873:8ddc892eca6e 6847229 assertion failed: refcount_count(&tx->tx_space_written) + delta <= tx->tx_space_towrite in dmu_tx.c (141445-06) 9904:d260bd3fd47c 6838344 kernel heap corruption detected on zil while stress testing (141445-06) 9951:a4895b3dd543 6844900 zfs_ioc_userspace_upgrade leaks (N/A) 10040:38b25aeeaf7a 6857012 zfs panics on zpool import (141445-06) 10000:241a51d8720c 6848242 zdb -e no longer works as expected (N/A) 10100:4a6965f6bef8 6856634 snv_117 not booting: zfs_parse_bootfs: error2 (141445-07) 10160:a45b03783d44 6861983 zfs should use new name <-> SID interfaces (N/A) 6862984 userquota commands can hang (141445-06) 10299:80845694147f 6696858 zfs receive of incremental replication stream can dereference NULL pointer and crash (N/A) 10302:a9e3d1987706 6696858 zfs receive of incremental replication stream can dereference NULL pointer and crash (fix lint) (N/A) 10575:2a8816c5173b (partial merge) 6882227 spa_async_remove() shouldn't do a full clear (142901-14) 10800:469478b180d9 6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached (142901-09) 6793430 zdb -ivvvv assertion failure: bp->blk_cksum.zc_word[2] == dmu_objset_id(zilog->zl_os) (N/A) 10801:e0bf032e8673 (partial merge) 6822816 assertion failed: zap_remove_int(ds_next_clones_obj) returns ENOENT (142901-09) 10810:b6b161a6ae4a 6892298 buf->b_hdr->b_state != arc_anon, file: ../../common/fs/zfs/arc.c, line: 2849 (142901-09) 10890:499786962772 6807339 spurious checksum errors when replacing a vdev (142901-13) 11249:6c30f7dfc97b 6906110 bad trap panic in zil_replay_log_record (142901-13) 6906946 zfs replay isn't handling uid/gid correctly (142901-13) 11454:6e69bacc1a5a 6898245 suspended zpool should not cause rest of the zfs/zpool commands to hang (142901-10) 11546:42ea6be8961b (partial merge) 6833999 3-way deadlock in dsl_dataset_hold_ref() and dsl_sync_task_group_sync() (142901-09) Discussed with: pjd Approved by: delphij (mentor) Obtained from: OpenSolaris (multiple Bug IDs) MFC after: 2 months
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c308
1 files changed, 261 insertions, 47 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
index c9e00d5..2678b83 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -164,10 +164,15 @@ dmu_objset_byteswap(void *buf, size_t size)
{
objset_phys_t *osp = buf;
- ASSERT(size == sizeof (objset_phys_t));
+ ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
dnode_byteswap(&osp->os_meta_dnode);
byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
osp->os_type = BSWAP_64(osp->os_type);
+ osp->os_flags = BSWAP_64(osp->os_flags);
+ if (size == sizeof (objset_phys_t)) {
+ dnode_byteswap(&osp->os_userused_dnode);
+ dnode_byteswap(&osp->os_groupused_dnode);
+ }
}
int
@@ -210,12 +215,30 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
err = EIO;
return (err);
}
+
+ /* Increase the blocksize if we are permitted. */
+ if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
+ arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
+ arc_buf_t *buf = arc_buf_alloc(spa,
+ sizeof (objset_phys_t), &osi->os_phys_buf,
+ ARC_BUFC_METADATA);
+ bzero(buf->b_data, sizeof (objset_phys_t));
+ bcopy(osi->os_phys_buf->b_data, buf->b_data,
+ arc_buf_size(osi->os_phys_buf));
+ (void) arc_buf_remove_ref(osi->os_phys_buf,
+ &osi->os_phys_buf);
+ osi->os_phys_buf = buf;
+ }
+
osi->os_phys = osi->os_phys_buf->b_data;
+ osi->os_flags = osi->os_phys->os_flags;
} else {
- osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
+ int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
+ sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
+ osi->os_phys_buf = arc_buf_alloc(spa, size,
&osi->os_phys_buf, ARC_BUFC_METADATA);
osi->os_phys = osi->os_phys_buf->b_data;
- bzero(osi->os_phys, sizeof (objset_phys_t));
+ bzero(osi->os_phys, size);
}
/*
@@ -276,6 +299,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
osi->os_meta_dnode = dnode_special_open(osi,
&osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
+ if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
+ osi->os_userused_dnode = dnode_special_open(osi,
+ &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
+ osi->os_groupused_dnode = dnode_special_open(osi,
+ &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
+ }
/*
* We should be the only thread trying to do this because we
@@ -456,13 +485,15 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg)
os.os = osi;
(void) dmu_objset_evict_dbufs(&os);
- ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL);
-
dnode_special_close(osi->os_meta_dnode);
+ if (osi->os_userused_dnode) {
+ dnode_special_close(osi->os_userused_dnode);
+ dnode_special_close(osi->os_groupused_dnode);
+ }
zil_free(osi->os_zil);
+ ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
+
VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
mutex_destroy(&osi->os_lock);
mutex_destroy(&osi->os_obj_lock);
@@ -520,6 +551,10 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
ASSERT(type != DMU_OST_ANY);
ASSERT(type < DMU_OST_NUMTYPES);
osi->os_phys->os_type = type;
+ if (dmu_objset_userused_enabled(osi)) {
+ osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ osi->os_flags = osi->os_phys->os_flags;
+ }
dsl_dataset_dirty(ds, tx);
@@ -704,13 +739,33 @@ struct snaparg {
char *snapname;
char failed[MAXPATHLEN];
boolean_t checkperms;
- list_t objsets;
+ nvlist_t *props;
};
-struct osnode {
- list_node_t node;
- objset_t *os;
-};
+static int
+snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ objset_t *os = arg1;
+ struct snaparg *sn = arg2;
+
+ /* The props have already been checked by zfs_check_userprops(). */
+
+ return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset,
+ sn->snapname, tx));
+}
+
+static void
+snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+ objset_t *os = arg1;
+ dsl_dataset_t *ds = os->os->os_dsl_dataset;
+ struct snaparg *sn = arg2;
+
+ dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
+
+ if (sn->props)
+ dsl_props_set_sync(ds->ds_prev, sn->props, cr, tx);
+}
static int
dmu_objset_snapshot_one(char *name, void *arg)
@@ -747,13 +802,8 @@ dmu_objset_snapshot_one(char *name, void *arg)
*/
err = zil_suspend(dmu_objset_zil(os));
if (err == 0) {
- struct osnode *osn;
- dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check,
- dsl_dataset_snapshot_sync, os->os->os_dsl_dataset,
- sn->snapname, 3);
- osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP);
- osn->os = os;
- list_insert_tail(&sn->objsets, osn);
+ dsl_sync_task_create(sn->dstg, snapshot_check,
+ snapshot_sync, os, sn, 3);
} else {
dmu_objset_close(os);
}
@@ -762,11 +812,11 @@ dmu_objset_snapshot_one(char *name, void *arg)
}
int
-dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
+dmu_objset_snapshot(char *fsname, char *snapname,
+ nvlist_t *props, boolean_t recursive)
{
dsl_sync_task_t *dst;
- struct osnode *osn;
- struct snaparg sn = { 0 };
+ struct snaparg sn;
spa_t *spa;
int err;
@@ -778,8 +828,7 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
sn.snapname = snapname;
- list_create(&sn.objsets, sizeof (struct osnode),
- offsetof(struct osnode, node));
+ sn.props = props;
if (recursive) {
sn.checkperms = B_TRUE;
@@ -790,27 +839,19 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
err = dmu_objset_snapshot_one(fsname, &sn);
}
- if (err)
- goto out;
-
- err = dsl_sync_task_group_wait(sn.dstg);
+ if (err == 0)
+ err = dsl_sync_task_group_wait(sn.dstg);
for (dst = list_head(&sn.dstg->dstg_tasks); dst;
dst = list_next(&sn.dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
+ objset_t *os = dst->dst_arg1;
+ dsl_dataset_t *ds = os->os->os_dsl_dataset;
if (dst->dst_err)
dsl_dataset_name(ds, sn.failed);
+ zil_resume(dmu_objset_zil(os));
+ dmu_objset_close(os);
}
-out:
- while (osn = list_head(&sn.objsets)) {
- list_remove(&sn.objsets, osn);
- zil_resume(dmu_objset_zil(osn->os));
- dmu_objset_close(osn->os);
- kmem_free(osn, sizeof (struct osnode));
- }
- list_destroy(&sn.objsets);
-
if (err)
(void) strcpy(fsname, sn.failed);
dsl_sync_task_group_destroy(sn.dstg);
@@ -819,7 +860,7 @@ out:
}
static void
-dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
+dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
{
dnode_t *dn;
@@ -827,14 +868,20 @@ dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
ASSERT(dn->dn_dbuf->db_data_pending);
/*
- * Initialize dn_zio outside dnode_sync()
- * to accomodate meta-dnode
+ * Initialize dn_zio outside dnode_sync() because the
+ * meta-dnode needs to set it ouside dnode_sync().
*/
dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
ASSERT(dn->dn_zio);
ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
list_remove(list, dn);
+
+ if (newlist) {
+ (void) dnode_add_ref(dn, newlist);
+ list_insert_tail(newlist, dn);
+ }
+
dnode_sync(dn, tx);
}
}
@@ -853,9 +900,12 @@ ready(zio_t *zio, arc_buf_t *abuf, void *arg)
ASSERT(BP_GET_LEVEL(bp) == 0);
/*
- * Update rootbp fill count.
+ * Update rootbp fill count: it should be the number of objects
+ * allocated in the object set (not counting the "special"
+ * objects that are stored in the objset_phys_t -- the meta
+ * dnode and user/group accounting objects).
*/
- bp->blk_fill = 1; /* count the meta-dnode */
+ bp->blk_fill = 0;
for (int i = 0; i < dnp->dn_nblkptr; i++)
bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
@@ -878,6 +928,7 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
writeprops_t wp = { 0 };
zio_t *zio;
list_t *list;
+ list_t *newlist = NULL;
dbuf_dirty_record_t *dr;
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
@@ -915,20 +966,41 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
}
arc_release(os->os_phys_buf, &os->os_phys_buf);
+
zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
/*
- * Sync meta-dnode - the parent IO for the sync is the root block
+ * Sync special dnodes - the parent IO for the sync is the root block
*/
os->os_meta_dnode->dn_zio = zio;
dnode_sync(os->os_meta_dnode, tx);
+ os->os_phys->os_flags = os->os_flags;
+
+ if (os->os_userused_dnode &&
+ os->os_userused_dnode->dn_type != DMU_OT_NONE) {
+ os->os_userused_dnode->dn_zio = zio;
+ dnode_sync(os->os_userused_dnode, tx);
+ os->os_groupused_dnode->dn_zio = zio;
+ dnode_sync(os->os_groupused_dnode, tx);
+ }
+
txgoff = tx->tx_txg & TXG_MASK;
- dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
- dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);
+ if (dmu_objset_userused_enabled(os)) {
+ newlist = &os->os_synced_dnodes;
+ /*
+ * We must create the list here because it uses the
+ * dn_dirty_link[] of this txg.
+ */
+ list_create(newlist, sizeof (dnode_t),
+ offsetof(dnode_t, dn_dirty_link[txgoff]));
+ }
+
+ dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
+ dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
list = &os->os_meta_dnode->dn_dirty_records[txgoff];
while (dr = list_head(list)) {
@@ -945,6 +1017,146 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
zio_nowait(zio);
}
+static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+
+void
+dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+{
+ used_cbs[ost] = cb;
+}
+
+boolean_t
+dmu_objset_userused_enabled(objset_impl_t *os)
+{
+ return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
+ used_cbs[os->os_phys->os_type] &&
+ os->os_userused_dnode);
+}
+
+void
+dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ list_t *list = &os->os_synced_dnodes;
+ static const char zerobuf[DN_MAX_BONUSLEN] = {0};
+
+ ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+
+ while (dn = list_head(list)) {
+ dmu_object_type_t bonustype;
+
+ ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
+ ASSERT(dn->dn_oldphys);
+ ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
+ dn->dn_phys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED);
+
+ /* Allocate the user/groupused objects if necessary. */
+ if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
+ VERIFY(0 == zap_create_claim(&os->os,
+ DMU_USERUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ VERIFY(0 == zap_create_claim(&os->os,
+ DMU_GROUPUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ }
+
+ /*
+ * If the object was not previously
+ * accounted, pretend that it was free.
+ */
+ if (!(dn->dn_oldphys->dn_flags &
+ DNODE_FLAG_USERUSED_ACCOUNTED)) {
+ bzero(dn->dn_oldphys, sizeof (dnode_phys_t));
+ }
+
+ /*
+ * If the object was freed, use the previous bonustype.
+ */
+ bonustype = dn->dn_phys->dn_bonustype ?
+ dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype;
+ ASSERT(dn->dn_phys->dn_type != 0 ||
+ (bcmp(DN_BONUS(dn->dn_phys), zerobuf,
+ DN_MAX_BONUSLEN) == 0 &&
+ DN_USED_BYTES(dn->dn_phys) == 0));
+ ASSERT(dn->dn_oldphys->dn_type != 0 ||
+ (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
+ DN_MAX_BONUSLEN) == 0 &&
+ DN_USED_BYTES(dn->dn_oldphys) == 0));
+ used_cbs[os->os_phys->os_type](&os->os, bonustype,
+ DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
+ DN_USED_BYTES(dn->dn_oldphys),
+ DN_USED_BYTES(dn->dn_phys), tx);
+
+ /*
+ * The mutex is needed here for interlock with dnode_allocate.
+ */
+ mutex_enter(&dn->dn_mtx);
+ zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t));
+ dn->dn_oldphys = NULL;
+ mutex_exit(&dn->dn_mtx);
+
+ list_remove(list, dn);
+ dnode_rele(dn, list);
+ }
+}
+
+boolean_t
+dmu_objset_userspace_present(objset_t *os)
+{
+ return (os->os->os_phys->os_flags &
+ OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+}
+
+int
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+ uint64_t obj;
+ int err = 0;
+
+ if (dmu_objset_userspace_present(os))
+ return (0);
+ if (!dmu_objset_userused_enabled(os->os))
+ return (ENOTSUP);
+ if (dmu_objset_is_snapshot(os))
+ return (EINVAL);
+
+ /*
+ * We simply need to mark every object dirty, so that it will be
+ * synced out and now accounted. If this is called
+ * concurrently, or if we already did some work before crashing,
+ * that's fine, since we track each object's accounted state
+ * independently.
+ */
+
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ int objerr;
+
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ return (EINTR);
+
+ objerr = dmu_bonus_hold(os, obj, FTAG, &db);
+ if (objerr)
+ continue;
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, obj);
+ objerr = dmu_tx_assign(tx, TXG_WAIT);
+ if (objerr) {
+ dmu_tx_abort(tx);
+ continue;
+ }
+ dmu_buf_will_dirty(db, tx);
+ dmu_buf_rele(db, FTAG);
+ dmu_tx_commit(tx);
+ }
+
+ os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ return (0);
+}
+
void
dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -978,6 +1190,8 @@ dmu_objset_stats(objset_t *os, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
os->os->os_phys->os_type);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
+ dmu_objset_userspace_present(os));
}
int
OpenPOWER on IntegriCloud