summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
committerpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
commit1b03c5bf41222b723415638f03e00ed12cac076a (patch)
treeef515cadc08bf427e4d3f1360199ec9827b1596b /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
parentc67d387baf03726323703774b1b320235fb1f24b (diff)
downloadFreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.zip
FreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.tar.gz
Finally... Import the latest open-source ZFS version - (SPA) 28.
Few new things available from now on: - Data deduplication. - Triple parity RAIDZ (RAIDZ3). - zfs diff. - zpool split. - Snapshot holds. - zpool import -F. Allows to rewind corrupted pool to earlier transaction group. - Possibility to import pool in read-only mode. MFC after: 1 month
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c178
1 files changed, 61 insertions, 117 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
index 3105088..29378d8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
@@ -44,14 +43,6 @@
#include <sys/zfs_fuid.h>
#include <sys/dsl_dataset.h>
-#define ZFS_HANDLE_REPLAY(zilog, tx) \
- if (zilog->zl_replay) { \
- dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); \
- zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = \
- zilog->zl_replaying_seq; \
- return; \
- }
-
/*
* These zfs_log_* functions must be called within a dmu tx, in one
* of 2 contexts depending on zilog->z_replay:
@@ -180,6 +171,15 @@ zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
+ *attrs |= (xoap->xoa_reparse == 0) ? 0 :
+ XAT0_REPARSE;
+ if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
+ *attrs |= (xoap->xoa_offline == 0) ? 0 :
+ XAT0_OFFLINE;
+ if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
+ *attrs |= (xoap->xoa_sparse == 0) ? 0 :
+ XAT0_SPARSE;
}
static void *
@@ -241,7 +241,6 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
zfs_fuid_info_t *fuidp, vattr_t *vap)
{
itx_t *itx;
- uint64_t seq;
lr_create_t *lr;
lr_acl_create_t *lracl;
size_t aclsize;
@@ -253,11 +252,9 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
size_t namesize = strlen(name) + 1;
size_t fuidsz = 0;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
/*
* If we have FUIDs present then add in space for
* domains and ACE fuid's if any.
@@ -288,21 +285,25 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) {
- lr->lr_uid = (uint64_t)zp->z_phys->zp_uid;
+ lr->lr_mode = zp->z_mode;
+ if (!IS_EPHEMERAL(zp->z_uid)) {
+ lr->lr_uid = (uint64_t)zp->z_uid;
} else {
lr->lr_uid = fuidp->z_fuid_owner;
}
- if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) {
- lr->lr_gid = (uint64_t)zp->z_phys->zp_gid;
+ if (!IS_EPHEMERAL(zp->z_gid)) {
+ lr->lr_gid = (uint64_t)zp->z_gid;
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
- lr->lr_rdev = zp->z_phys->zp_rdev;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
+ sizeof (uint64_t));
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+ lr->lr_crtime, sizeof (uint64_t) * 2);
+
+ if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev,
+ sizeof (lr->lr_rdev)) != 0)
+ lr->lr_rdev = 0;
/*
* Fill in xvattr info if any
@@ -341,9 +342,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
bcopy(name, end, namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -351,25 +350,23 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, char *name)
+ znode_t *dzp, char *name, uint64_t foid)
{
itx_t *itx;
- uint64_t seq;
lr_remove_t *lr;
size_t namesize = strlen(name) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_remove_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
+ itx->itx_oid = foid;
+
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -380,24 +377,19 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name)
{
itx_t *itx;
- uint64_t seq;
lr_link_t *lr;
size_t namesize = strlen(name) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_link_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_link_obj = zp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -408,32 +400,28 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link)
{
itx_t *itx;
- uint64_t seq;
lr_create_t *lr;
size_t namesize = strlen(name) + 1;
size_t linksize = strlen(link) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- lr->lr_uid = zp->z_phys->zp_uid;
- lr->lr_gid = zp->z_phys->zp_gid;
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
+ lr->lr_uid = zp->z_uid;
+ lr->lr_gid = zp->z_gid;
+ lr->lr_mode = zp->z_mode;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
+ sizeof (uint64_t));
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+ lr->lr_crtime, sizeof (uint64_t) * 2);
bcopy(name, (char *)(lr + 1), namesize);
bcopy(link, (char *)(lr + 1) + namesize, linksize);
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -444,27 +432,22 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
{
itx_t *itx;
- uint64_t seq;
lr_rename_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
- if (zilog == NULL)
+ if (zil_replaying(zilog, tx))
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr = (lr_rename_t *)&itx->itx_lr;
lr->lr_sdoid = sdzp->z_id;
lr->lr_tdoid = tdzp->z_id;
bcopy(sname, (char *)(lr + 1), snamesize);
bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
+ itx->itx_oid = szp->z_id;
- seq = zil_itx_assign(zilog, itx, tx);
- sdzp->z_last_itx = seq;
- tdzp->z_last_itx = seq;
- szp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -472,9 +455,6 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
ssize_t zfs_immediate_write_sz = 32768;
-#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
- sizeof (lr_write_t))
-
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
@@ -482,37 +462,17 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
boolean_t slogging;
uintptr_t fsync_cnt;
+ ssize_t immediate_write_sz;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zfs_immediate_write_sz;
- /*
- * Writes are handled in three different ways:
- *
- * WR_INDIRECT:
- * In this mode, if we need to commit the write later, then the block
- * is immediately written into the file system (using dmu_sync),
- * and a pointer to the block is put into the log record.
- * When the txg commits the block is linked in.
- * This saves additionally writing the data into the log record.
- * There are a few requirements for this to occur:
- * - write is greater than zfs_immediate_write_sz
- * - not using slogs (as slogs are assumed to always be faster
- * than writing into the main pool)
- * - the write occupies only one block
- * WR_COPIED:
- * If we know we'll immediately be committing the
- * transaction (FSYNC or FDSYNC), the we allocate a larger
- * log record here for the data and copy the data in.
- * WR_NEED_COPY:
- * Otherwise we don't allocate a buffer, and *if* we need to
- * flush the write later then a buffer is allocated and
- * we retrieve the data using the dmu.
- */
- slogging = spa_has_slogs(zilog->zl_spa);
- if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ slogging = spa_has_slogs(zilog->zl_spa) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+ if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
@@ -541,8 +501,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
lr = (lr_write_t *)&itx->itx_lr;
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
- kmem_free(itx, offsetof(itx_t, itx_lr) +
- itx->itx_lr.lrc_reclen);
+ zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
write_state = WR_NEED_COPY;
@@ -559,13 +518,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx->itx_private = zp->z_zfsvfs;
- if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) ||
- (ioflag & (FSYNC | FDSYNC)))
- itx->itx_sync = B_TRUE;
- else
+ if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
+ (fsync_cnt == 0))
itx->itx_sync = B_FALSE;
- zp->z_last_itx = zil_itx_assign(zilog, itx, tx);
+ zil_itx_assign(zilog, itx, tx);
off += len;
resid -= len;
@@ -580,14 +537,11 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len)
{
itx_t *itx;
- uint64_t seq;
lr_truncate_t *lr;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_truncate_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
@@ -595,8 +549,7 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
lr->lr_length = len;
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -607,18 +560,14 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
- uint64_t seq;
lr_setattr_t *lr;
xvattr_t *xvap = (xvattr_t *)vap;
size_t recsize = sizeof (lr_setattr_t);
void *start;
-
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
/*
* If XVATTR set, then log record size needs to allow
* for lr_attr_t + xvattr mask, mapsize and create time
@@ -662,8 +611,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
(void) zfs_log_fuid_domains(fuidp, start);
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
/*
@@ -674,7 +622,6 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
- uint64_t seq;
lr_acl_v0_t *lrv0;
lr_acl_t *lr;
int txtype;
@@ -682,11 +629,9 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
size_t txsize;
size_t aclbytes = vsecp->vsa_aclentsz;
- if (zilog == NULL || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
-
txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ?
TX_ACL_V0 : TX_ACL;
@@ -732,6 +677,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
}
itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
+ zil_itx_assign(zilog, itx, tx);
}
OpenPOWER on IntegriCloud