diff options
author | pjd <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
commit | bbe899b96e388a8b82439f81ed3707e0d9c6070d (patch) | |
tree | 81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c | |
parent | d2f579595c362ce27b4d87e2c40e1c4e09b929e3 (diff) | |
download | FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.zip FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.tar.gz |
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c | 178 |
1 files changed, 145 insertions, 33 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c index 5dff514..da498b6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,8 @@ #include <sys/spa.h> #include <sys/dmu.h> +static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); + #define CHAIN_END 0xffff /* end of the chunk chain */ /* half the (current) minimum block size */ @@ -150,7 +152,7 @@ zap_leaf_byteswap(zap_leaf_phys_t *buf, int size) } void -zap_leaf_init(zap_leaf_t *l) +zap_leaf_init(zap_leaf_t *l, boolean_t sort) { int i; @@ -165,6 +167,8 @@ zap_leaf_init(zap_leaf_t *l) l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); + if (sort) + l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; } /* @@ -327,19 +331,30 @@ zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, /* * Only to be used on 8-bit arrays. * array_len is actual len in bytes (not encoded le_value_length). - * buf is null-terminated. + * namenorm is null-terminated. */ -static int -zap_leaf_array_equal(zap_leaf_t *l, int chunk, - int array_len, const char *buf) +static boolean_t +zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len) { int bseen = 0; + if (zn->zn_matchtype == MT_FIRST) { + char *thisname = kmem_alloc(array_len, KM_SLEEP); + boolean_t match; + + zap_leaf_array_read(l, chunk, 1, array_len, 1, + array_len, thisname); + match = zap_match(zn, thisname); + kmem_free(thisname, array_len); + return (match); + } + + /* Fast path for exact matching */ while (bseen < array_len) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); - if (bcmp(la->la_array, buf + bseen, toread)) + if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread)) break; chunk = la->la_next; bseen += toread; @@ -352,15 +367,15 @@ zap_leaf_array_equal(zap_leaf_t *l, int chunk, */ int -zap_leaf_lookup(zap_leaf_t *l, - const char *name, uint64_t h, zap_entry_handle_t *zeh) +zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) { uint16_t *chunkp; struct zap_leaf_entry *le; ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); - for (chunkp = LEAF_HASH_ENTPTR(l, h); +again: + for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); *chunkp != CHAIN_END; chunkp = &le->le_next) { uint16_t chunk = *chunkp; le = ZAP_LEAF_ENTRY(l, chunk); @@ -368,11 +383,18 @@ zap_leaf_lookup(zap_leaf_t *l, ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); - if (le->le_hash != h) + if (le->le_hash != zn->zn_hash) continue; - if (zap_leaf_array_equal(l, le->le_name_chunk, - le->le_name_length, name)) { + /* + * NB: the entry chain is always sorted by cd on + * normalized zap objects, so this will find the + * lowest-cd match for MT_FIRST. + */ + ASSERT(zn->zn_matchtype == MT_EXACT || + (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); + if (zap_leaf_array_match(l, zn, le->le_name_chunk, + le->le_name_length)) { zeh->zeh_num_integers = le->le_value_length; zeh->zeh_integer_size = le->le_int_size; zeh->zeh_cd = le->le_cd; @@ -383,6 +405,15 @@ zap_leaf_lookup(zap_leaf_t *l, } } + /* + * NB: we could of course do this in one pass, but that would be + * a pain. We'll see if MT_BEST is even used much. + */ + if (zn->zn_matchtype == MT_BEST) { + zn->zn_matchtype = MT_FIRST; + goto again; + } + return (ENOENT); } @@ -539,22 +570,41 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, return (E2BIG); if (cd == ZAP_MAXCD) { - for (cd = 0; cd < ZAP_MAXCD; cd++) { + /* find the lowest unused cd */ + if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { + cd = 0; + for (chunk = *LEAF_HASH_ENTPTR(l, h); chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(l, chunk); - if (le->le_hash == h && - le->le_cd == cd) { + if (le->le_cd > cd) break; + if (le->le_hash == h) { + ASSERT3U(cd, ==, le->le_cd); + cd++; } } - /* If this cd is not in use, we are good. */ - if (chunk == CHAIN_END) - break; + } else { + /* old unsorted format; do it the O(n^2) way */ + for (cd = 0; cd < ZAP_MAXCD; cd++) { + for (chunk = *LEAF_HASH_ENTPTR(l, h); + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(l, chunk); + if (le->le_hash == h && + le->le_cd == cd) { + break; + } + } + /* If this cd is not in use, we are good. */ + if (chunk == CHAIN_END) + break; + } } - /* If we tried all the cd's, we lose. */ - if (cd == ZAP_MAXCD) - return (ENOSPC); + /* + * we would run out of space in a block before we could + * have ZAP_MAXCD entries + */ + ASSERT3U(cd, <, ZAP_MAXCD); } if (l->l_phys->l_hdr.lh_nfree < numchunks) @@ -574,9 +624,8 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, le->le_cd = cd; /* link it into the hash chain */ - chunkp = LEAF_HASH_ENTPTR(l, h); - le->le_next = *chunkp; - *chunkp = chunk; + /* XXX if we did the search above, we could just use that */ + chunkp = zap_leaf_rehash_entry(l, chunk); l->l_phys->l_hdr.lh_nentries++; @@ -591,16 +640,76 @@ zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, } /* + * Determine if there is another entry with the same normalized form. + * For performance purposes, either zn or name must be provided (the + * other can be NULL). Note, there usually won't be any hash + * conflicts, in which case we don't need the concatenated/normalized + * form of the name. But all callers have one of these on hand anyway, + * so might as well take advantage. A cleaner but slower interface + * would accept neither argument, and compute the normalized name as + * needed (using zap_name_alloc(zap_entry_read_name(zeh))). + */ +boolean_t +zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, + const char *name, zap_t *zap) +{ + uint64_t chunk; + struct zap_leaf_entry *le; + boolean_t allocdzn = B_FALSE; + + if (zap->zap_normflags == 0) + return (B_FALSE); + + for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); + chunk != CHAIN_END; chunk = le->le_next) { + le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); + if (le->le_hash != zeh->zeh_hash) + continue; + if (le->le_cd == zeh->zeh_cd) + continue; + + if (zn == NULL) { + zn = zap_name_alloc(zap, name, MT_FIRST); + allocdzn = B_TRUE; + } + if (zap_leaf_array_match(zeh->zeh_leaf, zn, + le->le_name_chunk, le->le_name_length)) { + if (allocdzn) + zap_name_free(zn); + return (B_TRUE); + } + } + if (allocdzn) + zap_name_free(zn); + return (B_FALSE); +} + +/* * Routines for transferring entries between leafs. */ -static void +static uint16_t * zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) { struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); - uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash); - le->le_next = *ptr; - *ptr = entry; + struct zap_leaf_entry *le2; + uint16_t *chunkp; + + /* + * keep the entry chain sorted by cd + * NB: this will not cause problems for unsorted leafs, though + * it is unnecessary there. + */ + for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); + *chunkp != CHAIN_END; chunkp = &le2->le_next) { + le2 = ZAP_LEAF_ENTRY(l, *chunkp); + if (le2->le_cd > le->le_cd) + break; + } + + le->le_next = *chunkp; + *chunkp = entry; + return (chunkp); } static uint16_t @@ -644,7 +753,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) nle = ZAP_LEAF_ENTRY(nl, chunk); *nle = *le; /* structure assignment */ - zap_leaf_rehash_entry(nl, chunk); + (void) zap_leaf_rehash_entry(nl, chunk); nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); nle->le_value_chunk = @@ -660,7 +769,7 @@ zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) * Transfer the entries whose hash prefix ends in 1 to the new leaf. */ void -zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl) +zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) { int i; int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; @@ -674,6 +783,9 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl) /* break existing hash chains */ zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); + if (sort) + l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; + /* * Transfer entries whose hash bit 'bit' is set to nl; rehash * the remaining entries @@ -691,7 +803,7 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl) if (le->le_hash & (1ULL << bit)) zap_leaf_transfer_entry(l, i, nl); else - zap_leaf_rehash_entry(l, i); + (void) zap_leaf_rehash_entry(l, i); } } @@ -726,7 +838,7 @@ zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) + ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * - le->le_int_size); + le->le_int_size); n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_entries_using_n_chunks[n]++; |