summaryrefslogtreecommitdiffstats
path: root/sys/cddl/boot/zfs/zfsimpl.h
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
committerpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
commit1b03c5bf41222b723415638f03e00ed12cac076a (patch)
treeef515cadc08bf427e4d3f1360199ec9827b1596b /sys/cddl/boot/zfs/zfsimpl.h
parentc67d387baf03726323703774b1b320235fb1f24b (diff)
downloadFreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.zip
FreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.tar.gz
Finally... Import the latest open-source ZFS version - (SPA) 28.
Few new things available from now on: - Data deduplication. - Triple parity RAIDZ (RAIDZ3). - zfs diff. - zpool split. - Snapshot holds. - zpool import -F. Allows to rewind corrupted pool to earlier transaction group. - Possibility to import pool in read-only mode. MFC after: 1 month
Diffstat (limited to 'sys/cddl/boot/zfs/zfsimpl.h')
-rw-r--r--sys/cddl/boot/zfs/zfsimpl.h246
1 files changed, 187 insertions, 59 deletions
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
index 34b9a63..9f24cc0 100644
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -95,6 +95,14 @@
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
/*
+ * Macros to reverse byte order
+ */
+#define BSWAP_8(x) ((x) & 0xff)
+#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+/*
* We currently support nine block sizes, from 512 bytes to 128K.
* We could go higher, but the benefits are near-zero and the cost
* of COWing a giant block to modify one byte would become excessive.
@@ -150,15 +158,15 @@ typedef struct zio_cksum {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
+ * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 9 | padding |
+ * 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * a | birth txg |
+ * a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -182,25 +190,29 @@ typedef struct zio_cksum {
* cksum checksum function
* comp compression function
* G gang block indicator
- * E endianness
- * type DMU object type
+ * B byteorder (endianness)
+ * D dedup
+ * X unused
* lvl level of indirection
- * birth txg transaction group in which the block was born
+ * type DMU object type
+ * phys birth txg of block allocation; zero if same as logical birth txg
+ * log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
-typedef struct blkptr {
- dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
- uint64_t blk_prop; /* size, compression, type, etc */
- uint64_t blk_pad[3]; /* Extra space for the future */
- uint64_t blk_birth; /* transaction group at birth */
- uint64_t blk_fill; /* fill count */
- zio_cksum_t blk_cksum; /* 256-bit checksum */
-} blkptr_t;
-
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
+typedef struct blkptr {
+ dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
+ uint64_t blk_prop; /* size, compression, type, etc */
+ uint64_t blk_pad[2]; /* Extra space for the future */
+ uint64_t blk_phys_birth; /* txg when block was allocated */
+ uint64_t blk_birth; /* transaction group at birth */
+ uint64_t blk_fill; /* fill count */
+ zio_cksum_t blk_cksum; /* 256-bit checksum */
+} blkptr_t;
+
/*
* Macros to get and set fields in a bp or DVA.
*/
@@ -246,9 +258,15 @@ typedef struct blkptr {
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
+#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
+#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
+
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
+#define BP_PHYSICAL_BIRTH(bp) \
+ ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
+
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
@@ -304,18 +322,41 @@ typedef struct blkptr {
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
- (bp)->blk_pad[2] = 0; \
+ (bp)->blk_phys_birth = 0; \
(bp)->blk_birth = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
-#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
+/*
+ * Embedded checksum
+ */
+#define ZEC_MAGIC 0x210da7ab10c7a11ULL
-typedef struct zio_block_tail {
- uint64_t zbt_magic; /* for validation, endianness */
- zio_cksum_t zbt_cksum; /* 256-bit checksum */
-} zio_block_tail_t;
+typedef struct zio_eck {
+ uint64_t zec_magic; /* for validation, endianness */
+ zio_cksum_t zec_cksum; /* 256-bit checksum */
+} zio_eck_t;
+
+/*
+ * Gang block headers are self-checksumming and contain an array
+ * of block pointers.
+ */
+#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
+#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
+ sizeof (zio_eck_t)) / sizeof (blkptr_t))
+#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
+ sizeof (zio_eck_t) - \
+ (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
+ sizeof (uint64_t))
+
+typedef struct zio_gbh {
+ blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
+ uint64_t zg_filler[SPA_GBH_FILLER];
+ zio_eck_t zg_tail;
+} zio_gbh_phys_t;
+
+#define VDEV_RAIDZ_MAXPARITY 3
#define VDEV_PAD_SIZE (8 << 10)
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
@@ -324,7 +365,7 @@ typedef struct zio_block_tail {
#define VDEV_UBERBLOCK_RING (128 << 10)
#define VDEV_UBERBLOCK_SHIFT(vd) \
- MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
+ MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT)
#define VDEV_UBERBLOCK_COUNT(vd) \
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
@@ -332,8 +373,8 @@ typedef struct zio_block_tail {
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
typedef struct vdev_phys {
- char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
- zio_block_tail_t vp_zbt;
+ char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)];
+ zio_eck_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
@@ -363,24 +404,6 @@ typedef struct vdev_label {
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
-/*
- * Gang block headers are self-checksumming and contain an array
- * of block pointers.
- */
-#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
-#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
-#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_block_tail_t) - \
- (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
- sizeof (uint64_t))
-
-typedef struct zio_gbh {
- blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
- uint64_t zg_filler[SPA_GBH_FILLER];
- zio_block_tail_t zg_tail;
-} zio_gbh_phys_t;
-
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
ZIO_CHECKSUM_ON,
@@ -391,10 +414,11 @@ enum zio_checksum {
ZIO_CHECKSUM_FLETCHER_2,
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
+ ZIO_CHECKSUM_ZILOG2,
ZIO_CHECKSUM_FUNCTIONS
};
-#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
+#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
enum zio_compress {
@@ -412,6 +436,7 @@ enum zio_compress {
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
+ ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_FUNCTIONS
};
@@ -470,13 +495,28 @@ typedef enum {
#define SPA_VERSION_13 13ULL
#define SPA_VERSION_14 14ULL
#define SPA_VERSION_15 15ULL
+#define SPA_VERSION_16 16ULL
+#define SPA_VERSION_17 17ULL
+#define SPA_VERSION_18 18ULL
+#define SPA_VERSION_19 19ULL
+#define SPA_VERSION_20 20ULL
+#define SPA_VERSION_21 21ULL
+#define SPA_VERSION_22 22ULL
+#define SPA_VERSION_23 23ULL
+#define SPA_VERSION_24 24ULL
+#define SPA_VERSION_25 25ULL
+#define SPA_VERSION_26 26ULL
+#define SPA_VERSION_27 27ULL
+#define SPA_VERSION_28 28ULL
+
/*
- * When bumping up SPA_VERSION, make sure GRUB ZFS understand the on-disk
- * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
- * and do the appropriate changes.
+ * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
+ * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
+ * and do the appropriate changes. Also bump the version number in
+ * usr/src/grub/capability.
*/
-#define SPA_VERSION SPA_VERSION_15
-#define SPA_VERSION_STRING "15"
+#define SPA_VERSION SPA_VERSION_28
+#define SPA_VERSION_STRING "28"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -513,6 +553,20 @@ typedef enum {
#define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
#define SPA_VERSION_USERSPACE SPA_VERSION_15
+#define SPA_VERSION_STMF_PROP SPA_VERSION_16
+#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
+#define SPA_VERSION_USERREFS SPA_VERSION_18
+#define SPA_VERSION_HOLES SPA_VERSION_19
+#define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20
+#define SPA_VERSION_DEDUP SPA_VERSION_21
+#define SPA_VERSION_RECVD_PROPS SPA_VERSION_22
+#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23
+#define SPA_VERSION_SA SPA_VERSION_24
+#define SPA_VERSION_SCAN SPA_VERSION_25
+#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
+#define SPA_VERSION_DEADLISTS SPA_VERSION_26
+#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
+#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
/*
* The following are configuration names used in the nvlist describing a pool's
@@ -558,6 +612,8 @@ typedef enum {
#define ZPOOL_CONFIG_FAULTED "faulted"
#define ZPOOL_CONFIG_DEGRADED "degraded"
#define ZPOOL_CONFIG_REMOVED "removed"
+#define ZPOOL_CONFIG_FRU "fru"
+#define ZPOOL_CONFIG_AUX_STATE "aux_state"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
@@ -566,7 +622,10 @@ typedef enum {
#define VDEV_TYPE_DISK "disk"
#define VDEV_TYPE_FILE "file"
#define VDEV_TYPE_MISSING "missing"
+#define VDEV_TYPE_HOLE "hole"
#define VDEV_TYPE_SPARE "spare"
+#define VDEV_TYPE_LOG "log"
+#define VDEV_TYPE_L2CACHE "l2cache"
/*
* This is needed in userland to report the minimum necessary device size.
@@ -577,11 +636,7 @@ typedef enum {
* The location of the pool configuration repository, shared between kernel and
* userland.
*/
-#define ZPOOL_CACHE_DIR "/boot/zfs"
-#define ZPOOL_CACHE_FILE "zpool.cache"
-#define ZPOOL_CACHE_TMP ".zpool.cache"
-
-#define ZPOOL_CACHE ZPOOL_CACHE_DIR "/" ZPOOL_CACHE_FILE
+#define ZPOOL_CACHE "/boot/zfs/zpool.cache"
/*
* vdev states are ordered from least to most healthy.
@@ -694,7 +749,11 @@ struct uberblock {
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
-#define DNODE_FLAG_USED_BYTES (1<<0)
+#define DNODE_FLAG_USED_BYTES (1<<0)
+#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
+
+/* Does dnode have a SA spill blkptr in bonus? */
+#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
@@ -716,7 +775,8 @@ typedef struct dnode_phys {
uint64_t dn_pad3[4];
blkptr_t dn_blkptr[1];
- uint8_t dn_bonus[DN_MAX_BONUSLEN];
+ uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
+ blkptr_t dn_spill;
} dnode_phys_t;
typedef enum dmu_object_type {
@@ -744,7 +804,7 @@ typedef enum dmu_object_type {
DMU_OT_DSL_DATASET, /* UINT64 */
/* zpl: */
DMU_OT_ZNODE, /* ZNODE */
- DMU_OT_ACL, /* ACL */
+ DMU_OT_OLDACL, /* Old ACL */
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
DMU_OT_MASTER_NODE, /* ZAP */
@@ -761,7 +821,24 @@ typedef enum dmu_object_type {
DMU_OT_SPA_HISTORY, /* UINT8 */
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
DMU_OT_POOL_PROPS, /* ZAP */
-
+ DMU_OT_DSL_PERMS, /* ZAP */
+ DMU_OT_ACL, /* ACL */
+ DMU_OT_SYSACL, /* SYSACL */
+ DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
+ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
+ DMU_OT_NEXT_CLONES, /* ZAP */
+ DMU_OT_SCAN_QUEUE, /* ZAP */
+ DMU_OT_USERGROUP_USED, /* ZAP */
+ DMU_OT_USERGROUP_QUOTA, /* ZAP */
+ DMU_OT_USERREFS, /* ZAP */
+ DMU_OT_DDT_ZAP, /* ZAP */
+ DMU_OT_DDT_STATS, /* ZAP */
+ DMU_OT_SA, /* System attr */
+ DMU_OT_SA_MASTER_NODE, /* ZAP */
+ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */
+ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */
+ DMU_OT_SCAN_XLATE, /* ZAP */
+ DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */
DMU_OT_NUMTYPES
} dmu_object_type_t;
@@ -776,6 +853,54 @@ typedef enum dmu_objset_type {
} dmu_objset_type_t;
/*
+ * header for all bonus and spill buffers.
+ * The header has a fixed portion with a variable number
+ * of "lengths" depending on the number of variable sized
+ * attribues which are determined by the "layout number"
+ */
+
+#define SA_MAGIC 0x2F505A /* ZFS SA */
+typedef struct sa_hdr_phys {
+ uint32_t sa_magic;
+ uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */
+ uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */
+ /* ... Data follows the lengths. */
+} sa_hdr_phys_t;
+
+/*
+ * sa_hdr_phys -> sa_layout_info
+ *
+ * 16 10 0
+ * +--------+-------+
+ * | hdrsz |layout |
+ * +--------+-------+
+ *
+ * Bits 0-10 are the layout number
+ * Bits 11-16 are the size of the header.
+ * The hdrsize is the number * 8
+ *
+ * For example.
+ * hdrsz of 1 ==> 8 byte header
+ * 2 ==> 16 byte header
+ *
+ */
+
+#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
+#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
+#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
+{ \
+ BF32_SET_SB(x, 10, 6, 3, 0, size); \
+ BF32_SET(x, 0, 10, num); \
+}
+
+#define SA_MODE_OFFSET 0
+#define SA_SIZE_OFFSET 8
+#define SA_GEN_OFFSET 16
+#define SA_UID_OFFSET 24
+#define SA_GID_OFFSET 32
+#define SA_PARENT_OFFSET 40
+
+/*
* Intent log header - this on disk structure holds fields to manage
* the log. All fields are 64 bit to easily handle cross architectures.
*/
@@ -787,12 +912,14 @@ typedef struct zil_header {
uint64_t zh_pad[5];
} zil_header_t;
+#define OBJSET_PHYS_SIZE 2048
+
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
uint64_t os_flags;
- char os_pad[2048 - sizeof (dnode_phys_t)*3 -
+ char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
sizeof (zil_header_t) - sizeof (uint64_t)*2];
dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode;
@@ -1174,11 +1301,12 @@ typedef struct vdev {
STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */
STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */
vdev_list_t v_children; /* children of this vdev */
- char *v_name; /* vdev name */
+ const char *v_name; /* vdev name */
uint64_t v_guid; /* vdev guid */
int v_id; /* index in parent */
int v_ashift; /* offset to block shift */
int v_nparity; /* # parity for raidz */
+ struct vdev *v_top; /* parent vdev */
int v_nchildren; /* # children */
vdev_state_t v_state; /* current state */
vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */
OpenPOWER on IntegriCloud