summaryrefslogtreecommitdiffstats
path: root/sys/cddl/boot
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2014-07-15 04:53:34 +0000
committerdelphij <delphij@FreeBSD.org>2014-07-15 04:53:34 +0000
commit9d1dc5bcc93f630c5fc480593d45fbcfebef15f2 (patch)
tree8bb48019ff1fadeaecb28e32888ab6093a430843 /sys/cddl/boot
parent5ed76404fecd3da81f3708db0259176767603aa5 (diff)
downloadFreeBSD-src-9d1dc5bcc93f630c5fc480593d45fbcfebef15f2.zip
FreeBSD-src-9d1dc5bcc93f630c5fc480593d45fbcfebef15f2.tar.gz
MFC r268075: MFV r267565:
4757 ZFS embedded-data block pointers ("zero block compression") 4913 zfs release should not be subject to space checks
Diffstat (limited to 'sys/cddl/boot')
-rw-r--r--sys/cddl/boot/zfs/README7
-rw-r--r--sys/cddl/boot/zfs/blkptr.c73
-rw-r--r--sys/cddl/boot/zfs/zfsimpl.h127
-rw-r--r--sys/cddl/boot/zfs/zfssubr.c10
4 files changed, 204 insertions, 13 deletions
diff --git a/sys/cddl/boot/zfs/README b/sys/cddl/boot/zfs/README
index f7c045d..5f16d5c 100644
--- a/sys/cddl/boot/zfs/README
+++ b/sys/cddl/boot/zfs/README
@@ -7,9 +7,10 @@ are used by the ZFS bootstrap:
sha256.c checksum support
lz4.c compression support
lzjb.c compression support
+ blkptr.c ZFS embedded-data block pointers support
zfssubr.c checksum, compression and raidz support
zfsimpl.h mostly describing the physical layout
-The files fletcher.c, lzjb.c and sha256.c are largely identical to the
-ZFS base code (with write support removed) and could be shared but
-that might complicate future imports from OpenSolaris.
+The files fletcher.c, lzjb.c, lz4.c, sha256.c and blkptr.c are largely identical
+to the ZFS base code (with write support removed) and could be shared but that
+might complicate future imports from Illumos.
diff --git a/sys/cddl/boot/zfs/blkptr.c b/sys/cddl/boot/zfs/blkptr.c
new file mode 100644
index 0000000..c36c59b
--- /dev/null
+++ b/sys/cddl/boot/zfs/blkptr.c
@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
+
+/*
+ * Embedded-data Block Pointers
+ *
+ * Normally, block pointers point (via their DVAs) to a block which holds data.
+ * If the data that we need to store is very small, this is an inefficient
+ * use of space, because a block must be at minimum 1 sector (typically 512
+ * bytes or 4KB). Additionally, reading these small blocks tends to generate
+ * more random reads.
+ *
+ * Embedded-data Block Pointers allow small pieces of data (the "payload",
+ * up to 112 bytes) to be stored in the block pointer itself, instead of
+ * being pointed to. The "Pointer" part of this name is a bit of a
+ * misnomer, as nothing is pointed to.
+ *
+ * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
+ * be embedded in the block pointer. The logic for this is handled in
+ * the SPA, by the zio pipeline. Therefore most code outside the zio
+ * pipeline doesn't need special-cases to handle these block pointers.
+ *
+ * See spa.h for details on the exact layout of embedded block pointers.
+ */
+
+/*
+ * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
+ * more than BPE_PAYLOAD_SIZE bytes).
+ */
+void
+decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
+{
+ int psize;
+ uint8_t *buf8 = buf;
+ uint64_t w = 0;
+ const uint64_t *bp64 = (const uint64_t *)bp;
+
+ ASSERT(BP_IS_EMBEDDED(bp));
+
+ psize = BPE_GET_PSIZE(bp);
+
+ /*
+ * Decode the words of the block pointer into the byte array.
+ * Low bits of first word are the first byte (little endian).
+ */
+ for (int i = 0; i < psize; i++) {
+ if (i % sizeof (w) == 0) {
+ /* beginning of a word */
+ ASSERT3P(bp64, <, bp + 1);
+ w = *bp64;
+ bp64++;
+ if (!BPE_IS_PAYLOADWORD(bp, bp64))
+ bp64++;
+ }
+ buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
+ }
+}
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
index 7bc4c43..98f5479 100644
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -55,9 +55,14 @@
/*
* Copyright 2013 by Saso Kiselkov. All rights reserved.
*/
+/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
#define MAXNAMELEN 256
+#define _NOTE(s)
+
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
@@ -163,7 +168,7 @@ typedef struct zio_cksum {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -197,7 +202,8 @@ typedef struct zio_cksum {
* G gang block indicator
* B byteorder (endianness)
* D dedup
- * X unused
+ * X encryption (on version 30, which is not supported)
+ * E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
@@ -205,6 +211,100 @@ typedef struct zio_cksum {
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
+
+/*
+ * "Embedded" blkptr_t's don't actually point to a block, instead they
+ * have a data payload embedded in the blkptr_t itself. See the comment
+ * in blkptr.c for more details.
+ *
+ * The blkptr_t is laid out as follows:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0 | payload |
+ * 1 | payload |
+ * 2 | payload |
+ * 3 | payload |
+ * 4 | payload |
+ * 5 | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7 | payload |
+ * 8 | payload |
+ * 9 | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * a | logical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * b | payload |
+ * c | payload |
+ * d | payload |
+ * e | payload |
+ * f | payload |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * payload contains the embedded data
+ * B (byteorder) byteorder (endianness)
+ * D (dedup) padding (set to zero)
+ * X encryption (set to zero; see above)
+ * E (embedded) set to one
+ * lvl indirection level
+ * type DMU object type
+ * etype how to interpret embedded data (BP_EMBEDDED_TYPE_*)
+ * comp compression function of payload
+ * PSIZE size of payload after compression, in bytes
+ * LSIZE logical size of payload, in bytes
+ * note that 25 bits is enough to store the largest
+ * "normal" BP's LSIZE (2^16 * 2^9) in bytes
+ * log. birth transaction group in which the block was logically born
+ *
+ * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
+ * bp's they are stored in units of SPA_MINBLOCKSHIFT.
+ * Generally, the generic BP_GET_*() macros can be used on embedded BP's.
+ * The B, D, X, lvl, type, and comp fields are stored the same as with normal
+ * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
+ * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
+ * other macros, as they assert that they are only used on BP's of the correct
+ * "embedded-ness".
+ */
+
+#define BPE_GET_ETYPE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET((bp)->blk_prop, 40, 8))
+#define BPE_SET_ETYPE(bp, t) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET((bp)->blk_prop, 40, 8, t); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BPE_GET_LSIZE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1))
+#define BPE_SET_LSIZE(bp, x) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \
+_NOTE(CONSTCOND) } while (0)
+
+#define BPE_GET_PSIZE(bp) \
+ (ASSERT(BP_IS_EMBEDDED(bp)), \
+ BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
+#define BPE_SET_PSIZE(bp, x) do { \
+ ASSERT(BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
+_NOTE(CONSTCOND) } while (0)
+
+typedef enum bp_embedded_type {
+ BP_EMBEDDED_TYPE_DATA,
+ BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
+ NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED
+} bp_embedded_type_t;
+
+#define BPE_NUM_WORDS 14
+#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
+#define BPE_IS_PAYLOADWORD(bp, wp) \
+ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
+
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
@@ -242,18 +342,22 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
- (BP_IS_HOLE(bp) ? 0 : \
+ (BP_IS_EMBEDDED(bp) ? \
+ (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \
BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
-#define BP_SET_LSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
+#define BP_SET_LSIZE(bp, x) do { \
+ ASSERT(!BP_IS_EMBEDDED(bp)); \
+ BF64_SET_SB((bp)->blk_prop, \
+ 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
+_NOTE(CONSTCOND) } while (0)
#define BP_GET_PSIZE(bp) \
BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_PSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
-#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
-#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
+#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
+#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
@@ -264,6 +368,8 @@ typedef struct blkptr {
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
+#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
+
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@@ -331,6 +437,11 @@ typedef struct blkptr {
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
+#define BPE_NUM_WORDS 14
+#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
+#define BPE_IS_PAYLOADWORD(bp, wp) \
+ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
+
/*
* Embedded checksum
*/
@@ -1341,3 +1452,5 @@ typedef struct spa {
objset_phys_t spa_mos; /* MOS for this pool */
int spa_inited; /* initialized */
} spa_t;
+
+static void decode_embedded_bp_compressed(const blkptr_t *, void *);
diff --git a/sys/cddl/boot/zfs/zfssubr.c b/sys/cddl/boot/zfs/zfssubr.c
index 0f01848..a64f065 100644
--- a/sys/cddl/boot/zfs/zfssubr.c
+++ b/sys/cddl/boot/zfs/zfssubr.c
@@ -30,9 +30,11 @@ static uint64_t zfs_crc64_table[256];
#define ECKSUM 666
-#define ASSERT(...) do { } while (0)
-#define ASSERT3U(...) do { } while (0)
-#define ASSERT3S(...) do { } while (0)
+#define ASSERT3S(x, y, z) ((void)0)
+#define ASSERT3U(x, y, z) ((void)0)
+#define ASSERT3P(x, y, z) ((void)0)
+#define ASSERT0(x) ((void)0)
+#define ASSERT(x) ((void)0)
#define panic(...) do { \
printf(__VA_ARGS__); \
@@ -82,6 +84,8 @@ typedef struct zio_checksum_info {
const char *ci_name; /* descriptive name */
} zio_checksum_info_t;
+#include "blkptr.c"
+
#include "fletcher.c"
#include "sha256.c"
OpenPOWER on IntegriCloud