diff options
author | delphij <delphij@FreeBSD.org> | 2014-07-15 04:53:34 +0000 |
---|---|---|
committer | delphij <delphij@FreeBSD.org> | 2014-07-15 04:53:34 +0000 |
commit | 9d1dc5bcc93f630c5fc480593d45fbcfebef15f2 (patch) | |
tree | 8bb48019ff1fadeaecb28e32888ab6093a430843 /sys/cddl/boot | |
parent | 5ed76404fecd3da81f3708db0259176767603aa5 (diff) | |
download | FreeBSD-src-9d1dc5bcc93f630c5fc480593d45fbcfebef15f2.zip FreeBSD-src-9d1dc5bcc93f630c5fc480593d45fbcfebef15f2.tar.gz |
MFC r268075: MFV r267565:
4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks
Diffstat (limited to 'sys/cddl/boot')
-rw-r--r-- | sys/cddl/boot/zfs/README | 7 | ||||
-rw-r--r-- | sys/cddl/boot/zfs/blkptr.c | 73 | ||||
-rw-r--r-- | sys/cddl/boot/zfs/zfsimpl.h | 127 | ||||
-rw-r--r-- | sys/cddl/boot/zfs/zfssubr.c | 10 |
4 files changed, 204 insertions, 13 deletions
diff --git a/sys/cddl/boot/zfs/README b/sys/cddl/boot/zfs/README index f7c045d..5f16d5c 100644 --- a/sys/cddl/boot/zfs/README +++ b/sys/cddl/boot/zfs/README @@ -7,9 +7,10 @@ are used by the ZFS bootstrap: sha256.c checksum support lz4.c compression support lzjb.c compression support + blkptr.c ZFS embedded-data block pointers support zfssubr.c checksum, compression and raidz support zfsimpl.h mostly describing the physical layout -The files fletcher.c, lzjb.c and sha256.c are largely identical to the -ZFS base code (with write support removed) and could be shared but -that might complicate future imports from OpenSolaris. +The files fletcher.c, lzjb.c, lz4.c, sha256.c and blkptr.c are largely identical +to the ZFS base code (with write support removed) and could be shared but that +might complicate future imports from Illumos. diff --git a/sys/cddl/boot/zfs/blkptr.c b/sys/cddl/boot/zfs/blkptr.c new file mode 100644 index 0000000..c36c59b --- /dev/null +++ b/sys/cddl/boot/zfs/blkptr.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +/* + * Embedded-data Block Pointers + * + * Normally, block pointers point (via their DVAs) to a block which holds data. + * If the data that we need to store is very small, this is an inefficient + * use of space, because a block must be at minimum 1 sector (typically 512 + * bytes or 4KB). Additionally, reading these small blocks tends to generate + * more random reads. + * + * Embedded-data Block Pointers allow small pieces of data (the "payload", + * up to 112 bytes) to be stored in the block pointer itself, instead of + * being pointed to. The "Pointer" part of this name is a bit of a + * misnomer, as nothing is pointed to. + * + * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to + * be embedded in the block pointer. The logic for this is handled in + * the SPA, by the zio pipeline. Therefore most code outside the zio + * pipeline doesn't need special-cases to handle these block pointers. + * + * See spa.h for details on the exact layout of embedded block pointers. + */ + +/* + * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be + * more than BPE_PAYLOAD_SIZE bytes). + */ +void +decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) +{ + int psize; + uint8_t *buf8 = buf; + uint64_t w = 0; + const uint64_t *bp64 = (const uint64_t *)bp; + + ASSERT(BP_IS_EMBEDDED(bp)); + + psize = BPE_GET_PSIZE(bp); + + /* + * Decode the words of the block pointer into the byte array. + * Low bits of first word are the first byte (little endian). + */ + for (int i = 0; i < psize; i++) { + if (i % sizeof (w) == 0) { + /* beginning of a word */ + ASSERT3P(bp64, <, bp + 1); + w = *bp64; + bp64++; + if (!BPE_IS_PAYLOADWORD(bp, bp64)) + bp64++; + } + buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); + } +} diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index 7bc4c43..98f5479 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -55,9 +55,14 @@ /* * Copyright 2013 by Saso Kiselkov. All rights reserved. */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ #define MAXNAMELEN 256 +#define _NOTE(s) + /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ @@ -163,7 +168,7 @@ typedef struct zio_cksum { * +-------+-------+-------+-------+-------+-------+-------+-------+ * 5 |G| offset3 | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE | + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -197,7 +202,8 @@ typedef struct zio_cksum { * G gang block indicator * B byteorder (endianness) * D dedup - * X unused + * X encryption (on version 30, which is not supported) + * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type * phys birth txg of block allocation; zero if same as logical birth txg @@ -205,6 +211,100 @@ typedef struct zio_cksum { * fill count number of non-zero blocks under this bp * checksum[4] 256-bit checksum of the data this bp describes */ + +/* + * "Embedded" blkptr_t's don't actually point to a block, instead they + * have a data payload embedded in the blkptr_t itself. See the comment + * in blkptr.c for more details. + * + * The blkptr_t is laid out as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | payload | + * 1 | payload | + * 2 | payload | + * 3 | payload | + * 4 | payload | + * 5 | payload | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | payload | + * 8 | payload | + * 9 | payload | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | payload | + * c | payload | + * d | payload | + * e | payload | + * f | payload | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * payload contains the embedded data + * B (byteorder) byteorder (endianness) + * D (dedup) padding (set to zero) + * X encryption (set to zero; see above) + * E (embedded) set to one + * lvl indirection level + * type DMU object type + * etype how to interpret embedded data (BP_EMBEDDED_TYPE_*) + * comp compression function of payload + * PSIZE size of payload after compression, in bytes + * LSIZE logical size of payload, in bytes + * note that 25 bits is enough to store the largest + * "normal" BP's LSIZE (2^16 * 2^9) in bytes + * log. birth transaction group in which the block was logically born + * + * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded + * bp's they are stored in units of SPA_MINBLOCKSHIFT. + * Generally, the generic BP_GET_*() macros can be used on embedded BP's. + * The B, D, X, lvl, type, and comp fields are stored the same as with normal + * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must + * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before + * other macros, as they assert that they are only used on BP's of the correct + * "embedded-ness". + */ + +#define BPE_GET_ETYPE(bp) \ + (ASSERT(BP_IS_EMBEDDED(bp)), \ + BF64_GET((bp)->blk_prop, 40, 8)) +#define BPE_SET_ETYPE(bp, t) do { \ + ASSERT(BP_IS_EMBEDDED(bp)); \ + BF64_SET((bp)->blk_prop, 40, 8, t); \ +_NOTE(CONSTCOND) } while (0) + +#define BPE_GET_LSIZE(bp) \ + (ASSERT(BP_IS_EMBEDDED(bp)), \ + BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1)) +#define BPE_SET_LSIZE(bp, x) do { \ + ASSERT(BP_IS_EMBEDDED(bp)); \ + BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \ +_NOTE(CONSTCOND) } while (0) + +#define BPE_GET_PSIZE(bp) \ + (ASSERT(BP_IS_EMBEDDED(bp)), \ + BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)) +#define BPE_SET_PSIZE(bp, x) do { \ + ASSERT(BP_IS_EMBEDDED(bp)); \ + BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ +_NOTE(CONSTCOND) } while (0) + +typedef enum bp_embedded_type { + BP_EMBEDDED_TYPE_DATA, + BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ + NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED +} bp_embedded_type_t; + +#define BPE_NUM_WORDS 14 +#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) +#define BPE_IS_PAYLOADWORD(bp, wp) \ + ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) + #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ @@ -242,18 +342,22 @@ typedef struct blkptr { #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) #define BP_GET_LSIZE(bp) \ - (BP_IS_HOLE(bp) ? 0 : \ + (BP_IS_EMBEDDED(bp) ? \ + (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)) -#define BP_SET_LSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) +#define BP_SET_LSIZE(bp, x) do { \ + ASSERT(!BP_IS_EMBEDDED(bp)); \ + BF64_SET_SB((bp)->blk_prop, \ + 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \ +_NOTE(CONSTCOND) } while (0) #define BP_GET_PSIZE(bp) \ BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) #define BP_SET_PSIZE(bp, x) \ BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x) #define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) @@ -264,6 +368,8 @@ typedef struct blkptr { #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1) + #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) @@ -331,6 +437,11 @@ typedef struct blkptr { ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ } +#define BPE_NUM_WORDS 14 +#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) +#define BPE_IS_PAYLOADWORD(bp, wp) \ + ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) + /* * Embedded checksum */ @@ -1341,3 +1452,5 @@ typedef struct spa { objset_phys_t spa_mos; /* MOS for this pool */ int spa_inited; /* initialized */ } spa_t; + +static void decode_embedded_bp_compressed(const blkptr_t *, void *); diff --git a/sys/cddl/boot/zfs/zfssubr.c b/sys/cddl/boot/zfs/zfssubr.c index 0f01848..a64f065 100644 --- a/sys/cddl/boot/zfs/zfssubr.c +++ b/sys/cddl/boot/zfs/zfssubr.c @@ -30,9 +30,11 @@ static uint64_t zfs_crc64_table[256]; #define ECKSUM 666 -#define ASSERT(...) do { } while (0) -#define ASSERT3U(...) do { } while (0) -#define ASSERT3S(...) do { } while (0) +#define ASSERT3S(x, y, z) ((void)0) +#define ASSERT3U(x, y, z) ((void)0) +#define ASSERT3P(x, y, z) ((void)0) +#define ASSERT0(x) ((void)0) +#define ASSERT(x) ((void)0) #define panic(...) do { \ printf(__VA_ARGS__); \ @@ -82,6 +84,8 @@ typedef struct zio_checksum_info { const char *ci_name; /* descriptive name */ } zio_checksum_info_t; +#include "blkptr.c" + #include "fletcher.c" #include "sha256.c" |