summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
committerpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
commitbbe899b96e388a8b82439f81ed3707e0d9c6070d (patch)
tree81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
parentd2f579595c362ce27b4d87e2c40e1c4e09b929e3 (diff)
downloadFreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.zip
FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.tar.gz
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes: - Delegated Administration Allows regular users to perform ZFS operations, like file system creation, snapshot creation, etc. - L2ARC Level 2 cache for ZFS - allows to use additional disks for cache. Huge performance improvements mostly for random read of mostly static content. - slog Allow to use additional disks for ZFS Intent Log to speed up operations like fsync(2). - vfs.zfs.super_owner Allows regular users to perform privileged operations on files stored on ZFS file systems owned by him. Very careful with this one. - chflags(2) Not all the flags are supported. This still needs work. - ZFSBoot Support to boot off of ZFS pool. Not finished, AFAIK. Submitted by: dfr - Snapshot properties - New failure modes Before if write requested failed, system paniced. Now one can select from one of three failure modes: - panic - panic on write error - wait - wait for disk to reappear - continue - serve read requests if possible, block write requests - Refquota, refreservation properties Just quota and reservation properties, but don't count space consumed by children file systems, clones and snapshots. - Sparse volumes ZVOLs that don't reserve space in the pool. - External attributes Compatible with extattr(2). - NFSv4-ACLs Not sure about the status, might not be complete yet. Submitted by: trasz - Creation-time properties - Regression tests for zpool(8) command. Obtained from: OpenSolaris
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c178
1 files changed, 145 insertions, 33 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
index 5dff514..da498b6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,6 +38,8 @@
#include <sys/spa.h>
#include <sys/dmu.h>
+static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
+
#define CHAIN_END 0xffff /* end of the chunk chain */
/* half the (current) minimum block size */
@@ -150,7 +152,7 @@ zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
}
void
-zap_leaf_init(zap_leaf_t *l)
+zap_leaf_init(zap_leaf_t *l, boolean_t sort)
{
int i;
@@ -165,6 +167,8 @@ zap_leaf_init(zap_leaf_t *l)
l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
+ if (sort)
+ l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
}
/*
@@ -327,19 +331,30 @@ zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
/*
* Only to be used on 8-bit arrays.
* array_len is actual len in bytes (not encoded le_value_length).
- * buf is null-terminated.
+ * namenorm is null-terminated.
*/
-static int
-zap_leaf_array_equal(zap_leaf_t *l, int chunk,
- int array_len, const char *buf)
+static boolean_t
+zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len)
{
int bseen = 0;
+ if (zn->zn_matchtype == MT_FIRST) {
+ char *thisname = kmem_alloc(array_len, KM_SLEEP);
+ boolean_t match;
+
+ zap_leaf_array_read(l, chunk, 1, array_len, 1,
+ array_len, thisname);
+ match = zap_match(zn, thisname);
+ kmem_free(thisname, array_len);
+ return (match);
+ }
+
+ /* Fast path for exact matching */
while (bseen < array_len) {
struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- if (bcmp(la->la_array, buf + bseen, toread))
+ if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread))
break;
chunk = la->la_next;
bseen += toread;
@@ -352,15 +367,15 @@ zap_leaf_array_equal(zap_leaf_t *l, int chunk,
*/
int
-zap_leaf_lookup(zap_leaf_t *l,
- const char *name, uint64_t h, zap_entry_handle_t *zeh)
+zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
{
uint16_t *chunkp;
struct zap_leaf_entry *le;
ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
- for (chunkp = LEAF_HASH_ENTPTR(l, h);
+again:
+ for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
*chunkp != CHAIN_END; chunkp = &le->le_next) {
uint16_t chunk = *chunkp;
le = ZAP_LEAF_ENTRY(l, chunk);
@@ -368,11 +383,18 @@ zap_leaf_lookup(zap_leaf_t *l,
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
- if (le->le_hash != h)
+ if (le->le_hash != zn->zn_hash)
continue;
- if (zap_leaf_array_equal(l, le->le_name_chunk,
- le->le_name_length, name)) {
+ /*
+ * NB: the entry chain is always sorted by cd on
+ * normalized zap objects, so this will find the
+ * lowest-cd match for MT_FIRST.
+ */
+ ASSERT(zn->zn_matchtype == MT_EXACT ||
+ (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
+ if (zap_leaf_array_match(l, zn, le->le_name_chunk,
+ le->le_name_length)) {
zeh->zeh_num_integers = le->le_value_length;
zeh->zeh_integer_size = le->le_int_size;
zeh->zeh_cd = le->le_cd;
@@ -383,6 +405,15 @@ zap_leaf_lookup(zap_leaf_t *l,
}
}
+ /*
+ * NB: we could of course do this in one pass, but that would be
+ * a pain. We'll see if MT_BEST is even used much.
+ */
+ if (zn->zn_matchtype == MT_BEST) {
+ zn->zn_matchtype = MT_FIRST;
+ goto again;
+ }
+
return (ENOENT);
}
@@ -539,22 +570,41 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
return (E2BIG);
if (cd == ZAP_MAXCD) {
- for (cd = 0; cd < ZAP_MAXCD; cd++) {
+ /* find the lowest unused cd */
+ if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
+ cd = 0;
+
for (chunk = *LEAF_HASH_ENTPTR(l, h);
chunk != CHAIN_END; chunk = le->le_next) {
le = ZAP_LEAF_ENTRY(l, chunk);
- if (le->le_hash == h &&
- le->le_cd == cd) {
+ if (le->le_cd > cd)
break;
+ if (le->le_hash == h) {
+ ASSERT3U(cd, ==, le->le_cd);
+ cd++;
}
}
- /* If this cd is not in use, we are good. */
- if (chunk == CHAIN_END)
- break;
+ } else {
+ /* old unsorted format; do it the O(n^2) way */
+ for (cd = 0; cd < ZAP_MAXCD; cd++) {
+ for (chunk = *LEAF_HASH_ENTPTR(l, h);
+ chunk != CHAIN_END; chunk = le->le_next) {
+ le = ZAP_LEAF_ENTRY(l, chunk);
+ if (le->le_hash == h &&
+ le->le_cd == cd) {
+ break;
+ }
+ }
+ /* If this cd is not in use, we are good. */
+ if (chunk == CHAIN_END)
+ break;
+ }
}
- /* If we tried all the cd's, we lose. */
- if (cd == ZAP_MAXCD)
- return (ENOSPC);
+ /*
+ * we would run out of space in a block before we could
+ * have ZAP_MAXCD entries
+ */
+ ASSERT3U(cd, <, ZAP_MAXCD);
}
if (l->l_phys->l_hdr.lh_nfree < numchunks)
@@ -574,9 +624,8 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
le->le_cd = cd;
/* link it into the hash chain */
- chunkp = LEAF_HASH_ENTPTR(l, h);
- le->le_next = *chunkp;
- *chunkp = chunk;
+ /* XXX if we did the search above, we could just use that */
+ chunkp = zap_leaf_rehash_entry(l, chunk);
l->l_phys->l_hdr.lh_nentries++;
@@ -591,16 +640,76 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
}
/*
+ * Determine if there is another entry with the same normalized form.
+ * For performance purposes, either zn or name must be provided (the
+ * other can be NULL). Note, there usually won't be any hash
+ * conflicts, in which case we don't need the concatenated/normalized
+ * form of the name. But all callers have one of these on hand anyway,
+ * so might as well take advantage. A cleaner but slower interface
+ * would accept neither argument, and compute the normalized name as
+ * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
+ */
+boolean_t
+zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
+ const char *name, zap_t *zap)
+{
+ uint64_t chunk;
+ struct zap_leaf_entry *le;
+ boolean_t allocdzn = B_FALSE;
+
+ if (zap->zap_normflags == 0)
+ return (B_FALSE);
+
+ for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
+ chunk != CHAIN_END; chunk = le->le_next) {
+ le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
+ if (le->le_hash != zeh->zeh_hash)
+ continue;
+ if (le->le_cd == zeh->zeh_cd)
+ continue;
+
+ if (zn == NULL) {
+ zn = zap_name_alloc(zap, name, MT_FIRST);
+ allocdzn = B_TRUE;
+ }
+ if (zap_leaf_array_match(zeh->zeh_leaf, zn,
+ le->le_name_chunk, le->le_name_length)) {
+ if (allocdzn)
+ zap_name_free(zn);
+ return (B_TRUE);
+ }
+ }
+ if (allocdzn)
+ zap_name_free(zn);
+ return (B_FALSE);
+}
+
+/*
* Routines for transferring entries between leafs.
*/
-static void
+static uint16_t *
zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
{
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
- uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash);
- le->le_next = *ptr;
- *ptr = entry;
+ struct zap_leaf_entry *le2;
+ uint16_t *chunkp;
+
+ /*
+ * keep the entry chain sorted by cd
+ * NB: this will not cause problems for unsorted leafs, though
+ * it is unnecessary there.
+ */
+ for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
+ *chunkp != CHAIN_END; chunkp = &le2->le_next) {
+ le2 = ZAP_LEAF_ENTRY(l, *chunkp);
+ if (le2->le_cd > le->le_cd)
+ break;
+ }
+
+ le->le_next = *chunkp;
+ *chunkp = entry;
+ return (chunkp);
}
static uint16_t
@@ -644,7 +753,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
nle = ZAP_LEAF_ENTRY(nl, chunk);
*nle = *le; /* structure assignment */
- zap_leaf_rehash_entry(nl, chunk);
+ (void) zap_leaf_rehash_entry(nl, chunk);
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
nle->le_value_chunk =
@@ -660,7 +769,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
* Transfer the entries whose hash prefix ends in 1 to the new leaf.
*/
void
-zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl)
+zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
{
int i;
int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
@@ -674,6 +783,9 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl)
/* break existing hash chains */
zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
+ if (sort)
+ l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
+
/*
* Transfer entries whose hash bit 'bit' is set to nl; rehash
* the remaining entries
@@ -691,7 +803,7 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl)
if (le->le_hash & (1ULL << bit))
zap_leaf_transfer_entry(l, i, nl);
else
- zap_leaf_rehash_entry(l, i);
+ (void) zap_leaf_rehash_entry(l, i);
}
}
@@ -726,7 +838,7 @@ zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) +
ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length *
- le->le_int_size);
+ le->le_int_size);
n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
zs->zs_entries_using_n_chunks[n]++;
OpenPOWER on IntegriCloud