summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpfg <pfg@FreeBSD.org>2016-01-21 14:50:28 +0000
committerpfg <pfg@FreeBSD.org>2016-01-21 14:50:28 +0000
commitd16eeed462db8fd9eb9e8bd7c2afe86e32c74f15 (patch)
tree9d88075af5bca4278c36143901c2bc345d2aecde
parenta4d9665ca89d09a03ad6aa2a31c0a00de4fea157 (diff)
downloadFreeBSD-src-d16eeed462db8fd9eb9e8bd7c2afe86e32c74f15.zip
FreeBSD-src-d16eeed462db8fd9eb9e8bd7c2afe86e32c74f15.tar.gz
ext2fs: Bring back the htree dir_index implementation.
The htree dir_index is perhaps one of the most characteristic features of the linux ext3 implementation. It was removed in r281670, due to repeated bug reports. Damjan Jovanic detected and fixed three bugs and did some stress testing by building Apache OpenOffice on top of it so it is now in good shape to bring back. Differential Revision: https://reviews.freebsd.org/D5007 Submitted by: Damjan Jovanovic Reviewed by: pfg Tested by: pho Relnotes: Yes MFC after: 2 months (only 10.x)
-rw-r--r--sys/conf/files2
-rw-r--r--sys/fs/ext2fs/ext2_dir.h15
-rw-r--r--sys/fs/ext2fs/ext2_extern.h15
-rw-r--r--sys/fs/ext2fs/ext2_hash.c316
-rw-r--r--sys/fs/ext2fs/ext2_htree.c899
-rw-r--r--sys/fs/ext2fs/ext2_lookup.c336
-rw-r--r--sys/fs/ext2fs/ext2_vfsops.c18
-rw-r--r--sys/fs/ext2fs/ext2fs.h8
-rw-r--r--sys/modules/ext2fs/Makefile4
9 files changed, 1496 insertions, 117 deletions
diff --git a/sys/conf/files b/sys/conf/files
index ad2ea5e..1b763c2 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3114,6 +3114,8 @@ fs/ext2fs/ext2_bmap.c optional ext2fs
fs/ext2fs/ext2_extents.c optional ext2fs
fs/ext2fs/ext2_inode.c optional ext2fs
fs/ext2fs/ext2_inode_cnv.c optional ext2fs
+fs/ext2fs/ext2_hash.c optional ext2fs
+fs/ext2fs/ext2_htree.c optional ext2fs
fs/ext2fs/ext2_lookup.c optional ext2fs
fs/ext2fs/ext2_subr.c optional ext2fs
fs/ext2fs/ext2_vfsops.c optional ext2fs
diff --git a/sys/fs/ext2fs/ext2_dir.h b/sys/fs/ext2fs/ext2_dir.h
index 1138b86..8d92882 100644
--- a/sys/fs/ext2fs/ext2_dir.h
+++ b/sys/fs/ext2fs/ext2_dir.h
@@ -40,6 +40,21 @@ struct ext2fs_direct {
uint16_t e2d_namlen; /* length of string in e2d_name */
char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */
};
+
+enum slotstatus {
+ NONE,
+ COMPACT,
+ FOUND
+};
+
+struct ext2fs_searchslot {
+ enum slotstatus slotstatus;
+ doff_t slotoffset; /* offset of area with free space */
+ int slotsize; /* size of area at slotoffset */
+ int slotfreespace; /* amount of space free in slot */
+ int slotneeded; /* sizeof the entry we are seeking */
+};
+
/*
* The new version of the directory entry. Since EXT2 structures are
* stored in intel byte order, and the name_len field could never be
diff --git a/sys/fs/ext2fs/ext2_extern.h b/sys/fs/ext2fs/ext2_extern.h
index f7b7657..93bd3f7 100644
--- a/sys/fs/ext2fs/ext2_extern.h
+++ b/sys/fs/ext2fs/ext2_extern.h
@@ -40,12 +40,15 @@
#define _FS_EXT2FS_EXT2_EXTERN_H_
struct ext2fs_dinode;
+struct ext2fs_direct_2;
+struct ext2fs_searchslot;
struct indir;
struct inode;
struct mount;
struct vfsconf;
struct vnode;
+int ext2_add_entry(struct vnode *, struct ext2fs_direct_2 *);
int ext2_alloc(struct inode *, daddr_t, e4fs_daddr_t, int,
struct ucred *, e4fs_daddr_t *);
int ext2_balloc(struct inode *,
@@ -83,6 +86,18 @@ int ext2_dirempty(struct inode *, ino_t, struct ucred *);
int ext2_checkpath(struct inode *, struct inode *, struct ucred *);
int cg_has_sb(int i);
int ext2_inactive(struct vop_inactive_args *);
+int ext2_htree_add_entry(struct vnode *, struct ext2fs_direct_2 *,
+ struct componentname *);
+int ext2_htree_create_index(struct vnode *, struct componentname *,
+ struct ext2fs_direct_2 *);
+int ext2_htree_has_idx(struct inode *);
+int ext2_htree_hash(const char *, int, uint32_t *, int, uint32_t *,
+ uint32_t *);
+int ext2_htree_lookup(struct inode *, const char *, int, struct buf **,
+ int *, doff_t *, doff_t *, doff_t *, struct ext2fs_searchslot *);
+int ext2_search_dirblock(struct inode *, void *, int *, const char *, int,
+ int *, doff_t *, doff_t *, doff_t *, struct ext2fs_searchslot *);
+
/* Flags to low-level allocation routines.
* The low 16-bits are reserved for IO_ flags from vnode.h.
diff --git a/sys/fs/ext2fs/ext2_hash.c b/sys/fs/ext2fs/ext2_hash.c
new file mode 100644
index 0000000..663a2df
--- /dev/null
+++ b/sys/fs/ext2fs/ext2_hash.c
@@ -0,0 +1,316 @@
+/*-
+ * Copyright (c) 2010, 2013 Zheng Liu <lz@freebsd.org>
+ * Copyright (c) 2012, Vyacheslav Matyushin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following notice applies to the code in ext2_half_md4():
+ *
+ * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD4 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD4 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include <fs/ext2fs/htree.h>
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2_extern.h>
+
+/* F, G, and H are MD4 functions */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/* ROTATE_LEFT rotates x left n bits */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+/*
+ * FF, GG, and HH are transformations for rounds 1, 2, and 3.
+ * Rotation is separated from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s) { \
+ (a) += F ((b), (c), (d)) + (x); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+}
+
+#define GG(a, b, c, d, x, s) { \
+ (a) += G ((b), (c), (d)) + (x) + (uint32_t)0x5A827999; \
+ (a) = ROTATE_LEFT ((a), (s)); \
+}
+
+#define HH(a, b, c, d, x, s) { \
+ (a) += H ((b), (c), (d)) + (x) + (uint32_t)0x6ED9EBA1; \
+ (a) = ROTATE_LEFT ((a), (s)); \
+}
+
+/*
+ * MD4 basic transformation. It transforms state based on block.
+ *
+ * This is a half md4 algorithm since Linux uses this algorithm for dir
+ * index. This function is derived from the RSA Data Security, Inc. MD4
+ * Message-Digest Algorithm and was modified as necessary.
+ *
+ * The return value of this function is uint32_t in Linux, but actually we don't
+ * need to check this value, so in our version this function doesn't return any
+ * value.
+ */
+static void
+ext2_half_md4(uint32_t hash[4], uint32_t data[8])
+{
+ uint32_t a = hash[0], b = hash[1], c = hash[2], d = hash[3];
+
+ /* Round 1 */
+ FF(a, b, c, d, data[0], 3);
+ FF(d, a, b, c, data[1], 7);
+ FF(c, d, a, b, data[2], 11);
+ FF(b, c, d, a, data[3], 19);
+ FF(a, b, c, d, data[4], 3);
+ FF(d, a, b, c, data[5], 7);
+ FF(c, d, a, b, data[6], 11);
+ FF(b, c, d, a, data[7], 19);
+
+ /* Round 2 */
+ GG(a, b, c, d, data[1], 3);
+ GG(d, a, b, c, data[3], 5);
+ GG(c, d, a, b, data[5], 9);
+ GG(b, c, d, a, data[7], 13);
+ GG(a, b, c, d, data[0], 3);
+ GG(d, a, b, c, data[2], 5);
+ GG(c, d, a, b, data[4], 9);
+ GG(b, c, d, a, data[6], 13);
+
+ /* Round 3 */
+ HH(a, b, c, d, data[3], 3);
+ HH(d, a, b, c, data[7], 9);
+ HH(c, d, a, b, data[2], 11);
+ HH(b, c, d, a, data[6], 15);
+ HH(a, b, c, d, data[1], 3);
+ HH(d, a, b, c, data[5], 9);
+ HH(c, d, a, b, data[0], 11);
+ HH(b, c, d, a, data[4], 15);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+/*
+ * Tiny Encryption Algorithm.
+ */
+static void
+ext2_tea(uint32_t hash[4], uint32_t data[8])
+{
+ uint32_t tea_delta = 0x9E3779B9;
+ uint32_t sum;
+ uint32_t x = hash[0], y = hash[1];
+ int n = 16;
+ int i = 1;
+
+ while (n-- > 0) {
+ sum = i * tea_delta;
+ x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]);
+ y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]);
+ i++;
+ }
+
+ hash[0] += x;
+ hash[1] += y;
+}
+
+static uint32_t
+ext2_legacy_hash(const char *name, int len, int unsigned_char)
+{
+ uint32_t h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9;
+ uint32_t multi = 0x6D22F5;
+ const unsigned char *uname = (const unsigned char *)name;
+ const signed char *sname = (const signed char *)name;
+ int val, i;
+
+ for (i = 0; i < len; i++) {
+ if (unsigned_char)
+ val = (u_int)*uname++;
+ else
+ val = (int)*sname++;
+
+ h0 = h2 + (h1 ^ (val * multi));
+ if (h0 & 0x80000000)
+ h0 -= 0x7FFFFFFF;
+ h2 = h1;
+ h1 = h0;
+ }
+
+ return (h1 << 1);
+}
+
+static void
+ext2_prep_hashbuf(const char *src, int slen, uint32_t *dst, int dlen,
+ int unsigned_char)
+{
+ uint32_t padding = slen | (slen << 8) | (slen << 16) | (slen << 24);
+ uint32_t buf_val;
+ const unsigned char *ubuf = (const unsigned char *)src;
+ const signed char *sbuf = (const signed char *)src;
+ int len, i;
+ int buf_byte;
+
+ if (slen > dlen)
+ len = dlen;
+ else
+ len = slen;
+
+ buf_val = padding;
+
+ for (i = 0; i < len; i++) {
+ if (unsigned_char)
+ buf_byte = (u_int)ubuf[i];
+ else
+ buf_byte = (int)sbuf[i];
+
+ if ((i % 4) == 0)
+ buf_val = padding;
+
+ buf_val <<= 8;
+ buf_val += buf_byte;
+
+ if ((i % 4) == 3) {
+ *dst++ = buf_val;
+ dlen -= sizeof(uint32_t);
+ buf_val = padding;
+ }
+ }
+
+ dlen -= sizeof(uint32_t);
+ if (dlen >= 0)
+ *dst++ = buf_val;
+
+ dlen -= sizeof(uint32_t);
+ while (dlen >= 0) {
+ *dst++ = padding;
+ dlen -= sizeof(uint32_t);
+ }
+}
+
+int
+ext2_htree_hash(const char *name, int len,
+ uint32_t *hash_seed, int hash_version,
+ uint32_t *hash_major, uint32_t *hash_minor)
+{
+ uint32_t hash[4];
+ uint32_t data[8];
+ uint32_t major = 0, minor = 0;
+ int unsigned_char = 0;
+
+ if (!name || !hash_major)
+ return (-1);
+
+ if (len < 1 || len > 255)
+ goto error;
+
+ hash[0] = 0x67452301;
+ hash[1] = 0xEFCDAB89;
+ hash[2] = 0x98BADCFE;
+ hash[3] = 0x10325476;
+
+ if (hash_seed)
+ memcpy(hash, hash_seed, sizeof(hash));
+
+ switch (hash_version) {
+ case EXT2_HTREE_TEA_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHROUGH */
+ case EXT2_HTREE_TEA:
+ while (len > 0) {
+ ext2_prep_hashbuf(name, len, data, 16, unsigned_char);
+ ext2_tea(hash, data);
+ len -= 16;
+ name += 16;
+ }
+ major = hash[0];
+ minor = hash[1];
+ break;
+ case EXT2_HTREE_LEGACY_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHROUGH */
+ case EXT2_HTREE_LEGACY:
+ major = ext2_legacy_hash(name, len, unsigned_char);
+ break;
+ case EXT2_HTREE_HALF_MD4_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHROUGH */
+ case EXT2_HTREE_HALF_MD4:
+ while (len > 0) {
+ ext2_prep_hashbuf(name, len, data, 32, unsigned_char);
+ ext2_half_md4(hash, data);
+ len -= 32;
+ name += 32;
+ }
+ major = hash[1];
+ minor = hash[2];
+ break;
+ default:
+ goto error;
+ }
+
+ major &= ~1;
+ if (major == (EXT2_HTREE_EOF << 1))
+ major = (EXT2_HTREE_EOF - 1) << 1;
+ *hash_major = major;
+ if (hash_minor)
+ *hash_minor = minor;
+
+ return (0);
+
+error:
+ *hash_major = 0;
+ if (hash_minor)
+ *hash_minor = 0;
+ return (-1);
+}
diff --git a/sys/fs/ext2fs/ext2_htree.c b/sys/fs/ext2fs/ext2_htree.c
new file mode 100644
index 0000000..ac506bd
--- /dev/null
+++ b/sys/fs/ext2fs/ext2_htree.c
@@ -0,0 +1,899 @@
+/*-
+ * Copyright (c) 2010, 2012 Zheng Liu <lz@freebsd.org>
+ * Copyright (c) 2012, Vyacheslav Matyushin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/endian.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/sysctl.h>
+
+#include <ufs/ufs/dir.h>
+
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_dinode.h>
+#include <fs/ext2fs/ext2_dir.h>
+#include <fs/ext2fs/htree.h>
+
+static void ext2_append_entry(char *block, uint32_t blksize,
+ struct ext2fs_direct_2 *last_entry,
+ struct ext2fs_direct_2 *new_entry);
+static int ext2_htree_append_block(struct vnode *vp, char *data,
+ struct componentname *cnp, uint32_t blksize);
+static int ext2_htree_check_next(struct inode *ip, uint32_t hash,
+ const char *name, struct ext2fs_htree_lookup_info *info);
+static int ext2_htree_cmp_sort_entry(const void *e1, const void *e2);
+static int ext2_htree_find_leaf(struct inode *ip, const char *name,
+ int namelen, uint32_t *hash, uint8_t *hash_version,
+ struct ext2fs_htree_lookup_info *info);
+static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep);
+static uint16_t ext2_htree_get_count(struct ext2fs_htree_entry *ep);
+static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep);
+static uint16_t ext2_htree_get_limit(struct ext2fs_htree_entry *ep);
+static void ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
+ uint32_t hash, uint32_t blk);
+static void ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
+ uint32_t hash, uint32_t blk);
+static uint32_t ext2_htree_node_limit(struct inode *ip);
+static void ext2_htree_set_block(struct ext2fs_htree_entry *ep,
+ uint32_t blk);
+static void ext2_htree_set_count(struct ext2fs_htree_entry *ep,
+ uint16_t cnt);
+static void ext2_htree_set_hash(struct ext2fs_htree_entry *ep,
+ uint32_t hash);
+static void ext2_htree_set_limit(struct ext2fs_htree_entry *ep,
+ uint16_t limit);
+static int ext2_htree_split_dirblock(char *block1, char *block2,
+ uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version,
+ uint32_t *split_hash, struct ext2fs_direct_2 *entry);
+static void ext2_htree_release(struct ext2fs_htree_lookup_info *info);
+static uint32_t ext2_htree_root_limit(struct inode *ip, int len);
+static int ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info);
+
+int
+ext2_htree_has_idx(struct inode *ip)
+{
+ if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
+ ip->i_flag & IN_E4INDEX)
+ return (1);
+ else
+ return (0);
+}
+
+static int
+ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name,
+ struct ext2fs_htree_lookup_info *info)
+{
+ struct vnode *vp = ITOV(ip);
+ struct ext2fs_htree_lookup_level *level;
+ struct buf *bp;
+ uint32_t next_hash;
+ int idx = info->h_levels_num - 1;
+ int levels = 0;
+
+ do {
+ level = &info->h_levels[idx];
+ level->h_entry++;
+ if (level->h_entry < level->h_entries +
+ ext2_htree_get_count(level->h_entries))
+ break;
+ if (idx == 0)
+ return (0);
+ idx--;
+ levels++;
+ } while (1);
+
+ next_hash = ext2_htree_get_hash(level->h_entry);
+ if ((hash & 1) == 0) {
+ if (hash != (next_hash & ~1))
+ return (0);
+ }
+
+ while (levels > 0) {
+ levels--;
+ if (ext2_blkatoff(vp, ext2_htree_get_block(level->h_entry) *
+ ip->i_e2fs->e2fs_bsize, NULL, &bp) != 0)
+ return (0);
+ level = &info->h_levels[idx + 1];
+ brelse(level->h_bp);
+ level->h_bp = bp;
+ level->h_entry = level->h_entries =
+ ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
+ }
+
+ return (1);
+}
+
+static uint32_t
+ext2_htree_get_block(struct ext2fs_htree_entry *ep)
+{
+ return (ep->h_blk & 0x00FFFFFF);
+}
+
+static void
+ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk)
+{
+ ep->h_blk = blk;
+}
+
+static uint16_t
+ext2_htree_get_count(struct ext2fs_htree_entry *ep)
+{
+ return (((struct ext2fs_htree_count *)(ep))->h_entries_num);
+}
+
+static void
+ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt)
+{
+ ((struct ext2fs_htree_count *)(ep))->h_entries_num = cnt;
+}
+
+static uint32_t
+ext2_htree_get_hash(struct ext2fs_htree_entry *ep)
+{
+ return (ep->h_hash);
+}
+
+static uint16_t
+ext2_htree_get_limit(struct ext2fs_htree_entry *ep)
+{
+ return (((struct ext2fs_htree_count *)(ep))->h_entries_max);
+}
+
+static void
+ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash)
+{
+ ep->h_hash = hash;
+}
+
+static void
+ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit)
+{
+ ((struct ext2fs_htree_count *)(ep))->h_entries_max = limit;
+}
+
+static void
+ext2_htree_release(struct ext2fs_htree_lookup_info *info)
+{
+ int i;
+
+ for (i = 0; i < info->h_levels_num; i++) {
+ struct buf *bp = info->h_levels[i].h_bp;
+ if (bp != NULL)
+ brelse(bp);
+ }
+}
+
+static uint32_t
+ext2_htree_root_limit(struct inode *ip, int len)
+{
+ uint32_t space;
+
+ space = ip->i_e2fs->e2fs_bsize - EXT2_DIR_REC_LEN(1) -
+ EXT2_DIR_REC_LEN(2) - len;
+ return (space / sizeof(struct ext2fs_htree_entry));
+}
+
+static uint32_t
+ext2_htree_node_limit(struct inode *ip)
+{
+ struct m_ext2fs *fs;
+ uint32_t space;
+
+ fs = ip->i_e2fs;
+ space = fs->e2fs_bsize - EXT2_DIR_REC_LEN(0);
+
+ return (space / sizeof(struct ext2fs_htree_entry));
+}
+
+static int
+ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen,
+ uint32_t *hash, uint8_t *hash_ver,
+ struct ext2fs_htree_lookup_info *info)
+{
+ struct vnode *vp;
+ struct ext2fs *fs;
+ struct m_ext2fs *m_fs;
+ struct buf *bp = NULL;
+ struct ext2fs_htree_root *rootp;
+ struct ext2fs_htree_entry *entp, *start, *end, *middle, *found;
+ struct ext2fs_htree_lookup_level *level_info;
+ uint32_t hash_major = 0, hash_minor = 0;
+ uint32_t levels, cnt;
+ uint8_t hash_version;
+
+ if (name == NULL || info == NULL)
+ return (-1);
+
+ vp = ITOV(ip);
+ fs = ip->i_e2fs->e2fs;
+ m_fs = ip->i_e2fs;
+
+ if (ext2_blkatoff(vp, 0, NULL, &bp) != 0)
+ return (-1);
+
+ info->h_levels_num = 1;
+ info->h_levels[0].h_bp = bp;
+ rootp = (struct ext2fs_htree_root *)bp->b_data;
+ if (rootp->h_info.h_hash_version != EXT2_HTREE_LEGACY &&
+ rootp->h_info.h_hash_version != EXT2_HTREE_HALF_MD4 &&
+ rootp->h_info.h_hash_version != EXT2_HTREE_TEA)
+ goto error;
+
+ hash_version = rootp->h_info.h_hash_version;
+ if (hash_version <= EXT2_HTREE_TEA)
+ hash_version += m_fs->e2fs_uhash;
+ *hash_ver = hash_version;
+
+ ext2_htree_hash(name, namelen, fs->e3fs_hash_seed,
+ hash_version, &hash_major, &hash_minor);
+ *hash = hash_major;
+
+ if ((levels = rootp->h_info.h_ind_levels) > 1)
+ goto error;
+
+ entp = (struct ext2fs_htree_entry *)(((char *)&rootp->h_info) +
+ rootp->h_info.h_info_len);
+
+ if (ext2_htree_get_limit(entp) !=
+ ext2_htree_root_limit(ip, rootp->h_info.h_info_len))
+ goto error;
+
+ while (1) {
+ cnt = ext2_htree_get_count(entp);
+ if (cnt == 0 || cnt > ext2_htree_get_limit(entp))
+ goto error;
+
+ start = entp + 1;
+ end = entp + cnt - 1;
+ while (start <= end) {
+ middle = start + (end - start) / 2;
+ if (ext2_htree_get_hash(middle) > hash_major)
+ end = middle - 1;
+ else
+ start = middle + 1;
+ }
+ found = start - 1;
+
+ level_info = &(info->h_levels[info->h_levels_num - 1]);
+ level_info->h_bp = bp;
+ level_info->h_entries = entp;
+ level_info->h_entry = found;
+ if (levels == 0)
+ return (0);
+ levels--;
+ if (ext2_blkatoff(vp,
+ ext2_htree_get_block(found) * m_fs->e2fs_bsize,
+ NULL, &bp) != 0)
+ goto error;
+ entp = ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
+ info->h_levels_num++;
+ info->h_levels[info->h_levels_num - 1].h_bp = bp;
+ }
+
+error:
+ ext2_htree_release(info);
+ return (-1);
+}
+
+/*
+ * Try to lookup a directory entry in HTree index
+ */
+int
+ext2_htree_lookup(struct inode *ip, const char *name, int namelen,
+ struct buf **bpp, int *entryoffp, doff_t *offp,
+ doff_t *prevoffp, doff_t *endusefulp,
+ struct ext2fs_searchslot *ss)
+{
+ struct vnode *vp;
+ struct ext2fs_htree_lookup_info info;
+ struct ext2fs_htree_entry *leaf_node;
+ struct m_ext2fs *m_fs;
+ struct buf *bp;
+ uint32_t blk;
+ uint32_t dirhash;
+ uint32_t bsize;
+ uint8_t hash_version;
+ int search_next;
+ int found = 0;
+
+ m_fs = ip->i_e2fs;
+ bsize = m_fs->e2fs_bsize;
+ vp = ITOV(ip);
+
+ /* TODO: print error msg because we don't lookup '.' and '..' */
+
+ memset(&info, 0, sizeof(info));
+ if (ext2_htree_find_leaf(ip, name, namelen, &dirhash,
+ &hash_version, &info))
+ return (-1);
+
+ do {
+ leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
+ blk = ext2_htree_get_block(leaf_node);
+ if (ext2_blkatoff(vp, blk * bsize, NULL, &bp) != 0) {
+ ext2_htree_release(&info);
+ return (-1);
+ }
+
+ *offp = blk * bsize;
+ *entryoffp = 0;
+ *prevoffp = blk * bsize;
+ *endusefulp = blk * bsize;
+
+ if (ss->slotstatus == NONE) {
+ ss->slotoffset = -1;
+ ss->slotfreespace = 0;
+ }
+
+ if (ext2_search_dirblock(ip, bp->b_data, &found,
+ name, namelen, entryoffp, offp, prevoffp,
+ endusefulp, ss) != 0) {
+ brelse(bp);
+ ext2_htree_release(&info);
+ return (-1);
+ }
+
+ if (found) {
+ *bpp = bp;
+ ext2_htree_release(&info);
+ return (0);
+ }
+
+ brelse(bp);
+ search_next = ext2_htree_check_next(ip, dirhash, name, &info);
+ } while (search_next);
+
+ ext2_htree_release(&info);
+ return (ENOENT);
+}
+
+static int
+ext2_htree_append_block(struct vnode *vp, char *data,
+ struct componentname *cnp, uint32_t blksize)
+{
+ struct iovec aiov;
+ struct uio auio;
+ struct inode *dp = VTOI(vp);
+ uint64_t cursize, newsize;
+ int error;
+
+ cursize = roundup(dp->i_size, blksize);
+ newsize = cursize + blksize;
+
+ auio.uio_offset = cursize;
+ auio.uio_resid = blksize;
+ aiov.iov_len = blksize;
+ aiov.iov_base = data;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ error = VOP_WRITE(vp, &auio, IO_SYNC, cnp->cn_cred);
+ if (!error)
+ dp->i_size = newsize;
+
+ return (error);
+}
+
+static int
+ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info)
+{
+ int i, error;
+
+ for (i = 0; i < info->h_levels_num; i++) {
+ struct buf *bp = info->h_levels[i].h_bp;
+ error = bwrite(bp);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+static void
+ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
+ uint32_t hash, uint32_t blk)
+{
+ struct ext2fs_htree_entry *target;
+ int entries_num;
+
+ target = level->h_entry + 1;
+ entries_num = ext2_htree_get_count(level->h_entries);
+
+ memmove(target + 1, target, (char *)(level->h_entries + entries_num) -
+ (char *)target);
+ ext2_htree_set_block(target, blk);
+ ext2_htree_set_hash(target, hash);
+ ext2_htree_set_count(level->h_entries, entries_num + 1);
+}
+
+/*
+ * Insert an index entry to the index node.
+ */
+static void
+ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
+ uint32_t hash, uint32_t blk)
+{
+ struct ext2fs_htree_lookup_level *level;
+
+ level = &info->h_levels[info->h_levels_num - 1];
+ ext2_htree_insert_entry_to_level(level, hash, blk);
+}
+
+/*
+ * Compare two entry sort descriptors by name hash value.
+ * This is used together with qsort.
+ */
+static int
+ext2_htree_cmp_sort_entry(const void *e1, const void *e2)
+{
+ const struct ext2fs_htree_sort_entry *entry1, *entry2;
+
+ entry1 = (const struct ext2fs_htree_sort_entry *)e1;
+ entry2 = (const struct ext2fs_htree_sort_entry *)e2;
+
+ if (entry1->h_hash < entry2->h_hash)
+ return (-1);
+ if (entry1->h_hash > entry2->h_hash)
+ return (1);
+ return (0);
+}
+
+/*
+ * Append an entry to the end of the directory block.
+ */
+static void
+ext2_append_entry(char *block, uint32_t blksize,
+ struct ext2fs_direct_2 *last_entry,
+ struct ext2fs_direct_2 *new_entry)
+{
+ uint16_t entry_len;
+
+ entry_len = EXT2_DIR_REC_LEN(last_entry->e2d_namlen);
+ last_entry->e2d_reclen = entry_len;
+ last_entry = (struct ext2fs_direct_2 *)((char *)last_entry + entry_len);
+ new_entry->e2d_reclen = block + blksize - (char *)last_entry;
+ memcpy(last_entry, new_entry, EXT2_DIR_REC_LEN(new_entry->e2d_namlen));
+}
+
+/*
+ * Move half of entries from the old directory block to the new one.
+ */
+static int
+ext2_htree_split_dirblock(char *block1, char *block2, uint32_t blksize,
+ uint32_t *hash_seed, uint8_t hash_version,
+ uint32_t *split_hash, struct ext2fs_direct_2 *entry)
+{
+ int entry_cnt = 0;
+ int size = 0;
+ int i, k;
+ uint32_t offset;
+ uint16_t entry_len = 0;
+ uint32_t entry_hash;
+ struct ext2fs_direct_2 *ep, *last;
+ char *dest;
+ struct ext2fs_htree_sort_entry *sort_info;
+
+ ep = (struct ext2fs_direct_2 *)block1;
+ dest = block2;
+ sort_info = (struct ext2fs_htree_sort_entry *)
+ ((char *)block2 + blksize);
+
+ /*
+ * Calculate name hash value for the entry which is to be added.
+ */
+ ext2_htree_hash(entry->e2d_name, entry->e2d_namlen, hash_seed,
+ hash_version, &entry_hash, NULL);
+
+ /*
+ * Fill in directory entry sort descriptors.
+ */
+ while ((char *)ep < block1 + blksize) {
+ if (ep->e2d_ino && ep->e2d_namlen) {
+ entry_cnt++;
+ sort_info--;
+ sort_info->h_size = ep->e2d_reclen;
+ sort_info->h_offset = (char *)ep - block1;
+ ext2_htree_hash(ep->e2d_name, ep->e2d_namlen,
+ hash_seed, hash_version,
+ &sort_info->h_hash, NULL);
+ }
+ ep = (struct ext2fs_direct_2 *)
+ ((char *)ep + ep->e2d_reclen);
+ }
+
+ /*
+ * Sort directory entry descriptors by name hash value.
+ */
+ qsort(sort_info, entry_cnt, sizeof(struct ext2fs_htree_sort_entry),
+ ext2_htree_cmp_sort_entry);
+
+ /*
+ * Count the number of entries to move to directory block 2.
+ */
+ for (i = entry_cnt - 1; i >= 0; i--) {
+ if (sort_info[i].h_size + size > blksize / 2)
+ break;
+ size += sort_info[i].h_size;
+ }
+
+ *split_hash = sort_info[i + 1].h_hash;
+
+ /*
+ * Set collision bit.
+ */
+ if (*split_hash == sort_info[i].h_hash)
+ *split_hash += 1;
+
+ /*
+ * Move half of directory entries from block 1 to block 2.
+ */
+ for (k = i + 1; k < entry_cnt; k++) {
+ ep = (struct ext2fs_direct_2 *)((char *)block1 +
+ sort_info[k].h_offset);
+ entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
+ memcpy(dest, ep, entry_len);
+ ((struct ext2fs_direct_2 *)dest)->e2d_reclen = entry_len;
+ /* Mark directory entry as unused. */
+ ep->e2d_ino = 0;
+ dest += entry_len;
+ }
+ dest -= entry_len;
+
+ /* Shrink directory entries in block 1. */
+ last = (struct ext2fs_direct_2 *)block1;
+ entry_len = 0;
+ for (offset = 0; offset < blksize; ) {
+ ep = (struct ext2fs_direct_2 *)(block1 + offset);
+ offset += ep->e2d_reclen;
+ if (ep->e2d_ino) {
+ last = (struct ext2fs_direct_2 *)
+ ((char *)last + entry_len);
+ entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
+ memcpy((void *)last, (void *)ep, entry_len);
+ last->e2d_reclen = entry_len;
+ }
+ }
+
+ if (entry_hash >= *split_hash) {
+ /* Add entry to block 2. */
+ ext2_append_entry(block2, blksize,
+ (struct ext2fs_direct_2 *)dest, entry);
+
+ /* Adjust length field of last entry of block 1. */
+ last->e2d_reclen = block1 + blksize - (char *)last;
+ } else {
+ /* Add entry to block 1. */
+ ext2_append_entry(block1, blksize, last, entry);
+
+ /* Adjust length field of last entry of block 2. */
+ ((struct ext2fs_direct_2 *)dest)->e2d_reclen =
+ block2 + blksize - dest;
+ }
+
+ return (0);
+}
+
+/*
+ * Create an HTree index for a directory
+ */
+int
+ext2_htree_create_index(struct vnode *vp, struct componentname *cnp,
+ struct ext2fs_direct_2 *new_entry)
+{
+ struct buf *bp = NULL;
+ struct inode *dp;
+ struct ext2fs *fs;
+ struct m_ext2fs *m_fs;
+ struct ext2fs_direct_2 *ep, *dotdot;
+ struct ext2fs_htree_root *root;
+ struct ext2fs_htree_lookup_info info;
+ uint32_t blksize, dirlen, split_hash;
+ uint8_t hash_version;
+ char *buf1 = NULL;
+ char *buf2 = NULL;
+ int error = 0;
+
+ dp = VTOI(vp);
+ fs = dp->i_e2fs->e2fs;
+ m_fs = dp->i_e2fs;
+ blksize = m_fs->e2fs_bsize;
+
+ buf1 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
+ buf2 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
+
+ if ((error = ext2_blkatoff(vp, 0, NULL, &bp)) != 0)
+ goto out;
+
+ root = (struct ext2fs_htree_root *)bp->b_data;
+ dotdot = (struct ext2fs_direct_2 *)((char *)&(root->h_dotdot));
+ ep = (struct ext2fs_direct_2 *)((char *)dotdot + dotdot->e2d_reclen);
+ dirlen = (char *)root + blksize - (char *)ep;
+ memcpy(buf1, ep, dirlen);
+ ep = (struct ext2fs_direct_2 *)buf1;
+ while ((char *)ep < buf1 + dirlen)
+ ep = (struct ext2fs_direct_2 *)
+ ((char *)ep + ep->e2d_reclen);
+ ep->e2d_reclen = buf1 + blksize - (char *)ep;
+
+ dp->i_flag |= IN_E4INDEX;
+
+ /*
+ * Initialize index root.
+ */
+ dotdot->e2d_reclen = blksize - EXT2_DIR_REC_LEN(1);
+ memset(&root->h_info, 0, sizeof(root->h_info));
+ root->h_info.h_hash_version = fs->e3fs_def_hash_version;
+ root->h_info.h_info_len = sizeof(root->h_info);
+ ext2_htree_set_block(root->h_entries, 1);
+ ext2_htree_set_count(root->h_entries, 1);
+ ext2_htree_set_limit(root->h_entries,
+ ext2_htree_root_limit(dp, sizeof(root->h_info)));
+
+ memset(&info, 0, sizeof(info));
+ info.h_levels_num = 1;
+ info.h_levels[0].h_entries = root->h_entries;
+ info.h_levels[0].h_entry = root->h_entries;
+
+ hash_version = root->h_info.h_hash_version;
+ if (hash_version <= EXT2_HTREE_TEA)
+ hash_version += m_fs->e2fs_uhash;
+ ext2_htree_split_dirblock(buf1, buf2, blksize, fs->e3fs_hash_seed,
+ hash_version, &split_hash, new_entry);
+ ext2_htree_insert_entry(&info, split_hash, 2);
+
+ /*
+ * Write directory block 0.
+ */
+ if (DOINGASYNC(vp)) {
+ bdwrite(bp);
+ error = 0;
+ } else {
+ error = bwrite(bp);
+ }
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (error)
+ goto out;
+
+ /*
+ * Write directory block 1.
+ */
+ error = ext2_htree_append_block(vp, buf1, cnp, blksize);
+ if (error)
+ goto out1;
+
+ /*
+ * Write directory block 2.
+ */
+ error = ext2_htree_append_block(vp, buf2, cnp, blksize);
+
+ free(buf1, M_TEMP);
+ free(buf2, M_TEMP);
+ return (error);
+out:
+ if (bp != NULL)
+ brelse(bp);
+out1:
+ free(buf1, M_TEMP);
+ free(buf2, M_TEMP);
+ return (error);
+}
+
+/*
+ * Add an entry to the directory using htree index.
+ */
+int
+ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
+ struct componentname *cnp)
+{
+ struct ext2fs_htree_entry *entries, *leaf_node;
+ struct ext2fs_htree_lookup_info info;
+ struct buf *bp = NULL;
+ struct ext2fs *fs;
+ struct m_ext2fs *m_fs;
+ struct inode *ip;
+ uint16_t ent_num;
+ uint32_t dirhash, split_hash;
+ uint32_t blksize, blknum;
+ uint64_t cursize, dirsize;
+ uint8_t hash_version;
+ char *newdirblock = NULL;
+ char *newidxblock = NULL;
+ struct ext2fs_htree_node *dst_node;
+ struct ext2fs_htree_entry *dst_entries;
+ struct ext2fs_htree_entry *root_entires;
+ struct buf *dst_bp = NULL;
+ int error, write_bp = 0, write_dst_bp = 0, write_info = 0;
+
+ ip = VTOI(dvp);
+ m_fs = ip->i_e2fs;
+ fs = m_fs->e2fs;
+ blksize = m_fs->e2fs_bsize;
+
+ if (ip->i_count != 0)
+ return ext2_add_entry(dvp, entry);
+
+ /* Target directory block is full, split it */
+ memset(&info, 0, sizeof(info));
+ error = ext2_htree_find_leaf(ip, entry->e2d_name, entry->e2d_namlen,
+ &dirhash, &hash_version, &info);
+ if (error)
+ return (error);
+
+ entries = info.h_levels[info.h_levels_num - 1].h_entries;
+ ent_num = ext2_htree_get_count(entries);
+ if (ent_num == ext2_htree_get_limit(entries)) {
+ /* Split the index node. */
+ root_entires = info.h_levels[0].h_entries;
+ newidxblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
+ dst_node = (struct ext2fs_htree_node *)newidxblock;
+ dst_entries = dst_node->h_entries;
+ memset(&dst_node->h_fake_dirent, 0,
+ sizeof(dst_node->h_fake_dirent));
+ dst_node->h_fake_dirent.e2d_reclen = blksize;
+
+ cursize = roundup(ip->i_size, blksize);
+ dirsize = cursize + blksize;
+ blknum = dirsize / blksize - 1;
+
+ error = ext2_htree_append_block(dvp, newidxblock,
+ cnp, blksize);
+ if (error)
+ goto finish;
+ error = ext2_blkatoff(dvp, cursize, NULL, &dst_bp);
+ if (error)
+ goto finish;
+ dst_node = (struct ext2fs_htree_node *)dst_bp->b_data;
+ dst_entries = dst_node->h_entries;
+
+ if (info.h_levels_num == 2) {
+ uint16_t src_ent_num, dst_ent_num;
+
+ if (ext2_htree_get_count(root_entires) ==
+ ext2_htree_get_limit(root_entires)) {
+ /* Directory index is full */
+ error = EIO;
+ goto finish;
+ }
+
+ src_ent_num = ent_num / 2;
+ dst_ent_num = ent_num - src_ent_num;
+ split_hash = ext2_htree_get_hash(entries + src_ent_num);
+
+ /* Move half of index entries to the new index node */
+ memcpy(dst_entries, entries + src_ent_num,
+ dst_ent_num * sizeof(struct ext2fs_htree_entry));
+ ext2_htree_set_count(entries, src_ent_num);
+ ext2_htree_set_count(dst_entries, dst_ent_num);
+ ext2_htree_set_limit(dst_entries,
+ ext2_htree_node_limit(ip));
+
+ if (info.h_levels[1].h_entry >= entries + src_ent_num) {
+ struct buf *tmp = info.h_levels[1].h_bp;
+ info.h_levels[1].h_bp = dst_bp;
+ dst_bp = tmp;
+
+ info.h_levels[1].h_entry =
+ info.h_levels[1].h_entry -
+ (entries + src_ent_num) +
+ dst_entries;
+ info.h_levels[1].h_entries = dst_entries;
+ }
+ ext2_htree_insert_entry_to_level(&info.h_levels[0],
+ split_hash, blknum);
+
+ /* Write new index node to disk */
+ error = bwrite(dst_bp);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (error)
+ goto finish;
+ write_dst_bp = 1;
+ } else {
+ /* Create second level for htree index */
+ struct ext2fs_htree_root *idx_root;
+
+ memcpy(dst_entries, entries,
+ ent_num * sizeof(struct ext2fs_htree_entry));
+ ext2_htree_set_limit(dst_entries,
+ ext2_htree_node_limit(ip));
+
+ idx_root = (struct ext2fs_htree_root *)
+ info.h_levels[0].h_bp->b_data;
+ idx_root->h_info.h_ind_levels = 1;
+
+ ext2_htree_set_count(entries, 1);
+ ext2_htree_set_block(entries, blknum);
+
+ info.h_levels_num = 2;
+ info.h_levels[1].h_entries = dst_entries;
+ info.h_levels[1].h_entry = info.h_levels[0].h_entry -
+ info.h_levels[0].h_entries + dst_entries;
+ info.h_levels[1].h_bp = dst_bp;
+ dst_bp = NULL;
+ }
+ }
+
+ leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
+ blknum = ext2_htree_get_block(leaf_node);
+ error = ext2_blkatoff(dvp, blknum * blksize, NULL, &bp);
+ if (error)
+ goto finish;
+
+ /* Split target directory block */
+ newdirblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
+ ext2_htree_split_dirblock((char *)bp->b_data, newdirblock, blksize,
+ fs->e3fs_hash_seed, hash_version, &split_hash, entry);
+ cursize = roundup(ip->i_size, blksize);
+ dirsize = cursize + blksize;
+ blknum = dirsize / blksize - 1;
+
+ /* Add index entry for the new directory block */
+ ext2_htree_insert_entry(&info, split_hash, blknum);
+
+ /* Write the new directory block to the end of the directory */
+ error = ext2_htree_append_block(dvp, newdirblock, cnp, blksize);
+ if (error)
+ goto finish;
+
+ /* Write the target directory block */
+ error = bwrite(bp);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (error)
+ goto finish;
+ write_bp = 1;
+
+ /* Write the index block */
+ error = ext2_htree_writebuf(&info);
+ if (!error)
+ write_info = 1;
+
+finish:
+ if (dst_bp != NULL && !write_dst_bp)
+ brelse(dst_bp);
+ if (bp != NULL && !write_bp)
+ brelse(bp);
+ if (newdirblock != NULL)
+ free(newdirblock, M_TEMP);
+ if (newidxblock != NULL)
+ free(newidxblock, M_TEMP);
+ if (!write_info)
+ ext2_htree_release(&info);
+ return (error);
+}
diff --git a/sys/fs/ext2fs/ext2_lookup.c b/sys/fs/ext2fs/ext2_lookup.c
index 379eb44..309e383 100644
--- a/sys/fs/ext2fs/ext2_lookup.c
+++ b/sys/fs/ext2fs/ext2_lookup.c
@@ -113,9 +113,19 @@ static u_char dt_to_ext2_ft[] = {
static int ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de,
int entryoffsetinblock);
+static int ext2_is_dot_entry(struct componentname *cnp);
static int ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp,
struct componentname *cnp, ino_t *dd_ino);
+static int
+ext2_is_dot_entry(struct componentname *cnp)
+{
+ if (cnp->cn_namelen <= 2 && cnp->cn_nameptr[0] == '.' &&
+ (cnp->cn_nameptr[1] == '.' || cnp->cn_nameptr[1] == '\0'))
+ return (1);
+ return (0);
+}
+
/*
* Vnode op for reading directories.
*/
@@ -296,13 +306,9 @@ ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp
struct buf *bp; /* a buffer of directory entries */
struct ext2fs_direct_2 *ep; /* the current directory entry */
int entryoffsetinblock; /* offset of ep in bp's buffer */
- enum {NONE, COMPACT, FOUND} slotstatus;
- doff_t slotoffset; /* offset of area with free space */
+ struct ext2fs_searchslot ss;
doff_t i_diroff; /* cached i_diroff value */
doff_t i_offset; /* cached i_offset value */
- int slotsize; /* size of area at slotoffset */
- int slotfreespace; /* amount of space free in slot */
- int slotneeded; /* size of the entry we're seeking */
int numdirpasses; /* strategy for directory search */
doff_t endsearch; /* offset to end directory search */
doff_t prevoff; /* prev entry dp->i_offset */
@@ -310,12 +316,13 @@ ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp
struct vnode *tdp; /* returned by VFS_VGET */
doff_t enduseful; /* pointer past last used dir slot */
u_long bmask; /* block offset mask */
- int namlen, error;
+ int error;
struct ucred *cred = cnp->cn_cred;
int flags = cnp->cn_flags;
int nameiop = cnp->cn_nameiop;
ino_t ino, ino1;
int ltype;
+ int entry_found = 0;
int DIRBLKSIZ = VTOI(vdp)->i_e2fs->e2fs_bsize;
@@ -326,31 +333,57 @@ ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp
bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
restart:
bp = NULL;
- slotoffset = -1;
+ ss.slotoffset = -1;
/*
* We now have a segment name to search for, and a directory to search.
- */
-
- /*
+ *
* Suppress search for slots unless creating
* file and at end of pathname, in which case
* we watch for a place to put the new file in
* case it doesn't already exist.
*/
i_diroff = dp->i_diroff;
- slotstatus = FOUND;
- slotfreespace = slotsize = slotneeded = 0;
+ ss.slotstatus = FOUND;
+ ss.slotfreespace = ss.slotsize = ss.slotneeded = 0;
if ((nameiop == CREATE || nameiop == RENAME) &&
(flags & ISLASTCN)) {
- slotstatus = NONE;
- slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
+ ss.slotstatus = NONE;
+ ss.slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
/* was
- slotneeded = (sizeof(struct direct) - MAXNAMLEN +
+ ss.slotneeded = (sizeof(struct direct) - MAXNAMLEN +
cnp->cn_namelen + 3) &~ 3; */
}
/*
+ * Try to lookup dir entry using htree directory index.
+ *
+ * If we got an error or we want to find '.' or '..' entry,
+ * we will fall back to linear search.
+ */
+ if (!ext2_is_dot_entry(cnp) && ext2_htree_has_idx(dp)) {
+ numdirpasses = 1;
+ entryoffsetinblock = 0;
+ switch (ext2_htree_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
+ &bp, &entryoffsetinblock, &i_offset, &prevoff,
+ &enduseful, &ss)) {
+ case 0:
+ ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
+ (i_offset & bmask));
+ goto foundentry;
+ case ENOENT:
+ i_offset = roundup2(dp->i_size, DIRBLKSIZ);
+ goto notfound;
+ default:
+ /*
+ * Something failed; just fallback to do a linear
+ * search.
+ */
+ break;
+ }
+ }
+
+ /*
* If there is cached information on a previous search of
* this directory, pick up where we last left off.
* We cache only lookups as these are the most common
@@ -384,96 +417,38 @@ searchloop:
/*
* If necessary, get the next directory block.
*/
- if ((i_offset & bmask) == 0) {
- if (bp != NULL)
- brelse(bp);
- if ((error =
- ext2_blkatoff(vdp, (off_t)i_offset, NULL,
- &bp)) != 0)
- return (error);
- entryoffsetinblock = 0;
- }
+ if (bp != NULL)
+ brelse(bp);
+ error = ext2_blkatoff(vdp, (off_t)i_offset, NULL, &bp);
+ if (error != 0)
+ return (error);
+ entryoffsetinblock = 0;
/*
* If still looking for a slot, and at a DIRBLKSIZE
* boundary, have to start looking for free space again.
*/
- if (slotstatus == NONE &&
+ if (ss.slotstatus == NONE &&
(entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
- slotoffset = -1;
- slotfreespace = 0;
+ ss.slotoffset = -1;
+ ss.slotfreespace = 0;
}
- /*
- * Get pointer to next entry.
- * Full validation checks are slow, so we only check
- * enough to insure forward progress through the
- * directory. Complete checks can be run by setting
- * "vfs.e2fs.dirchk" to be true.
- */
- ep = (struct ext2fs_direct_2 *)
- ((char *)bp->b_data + entryoffsetinblock);
- if (ep->e2d_reclen == 0 ||
- (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
- int i;
- ext2_dirbad(dp, i_offset, "mangled entry");
- i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
- i_offset += i;
- entryoffsetinblock += i;
- continue;
- }
-
- /*
- * If an appropriate sized slot has not yet been found,
- * check to see if one is available. Also accumulate space
- * in the current block so that we can determine if
- * compaction is viable.
- */
- if (slotstatus != FOUND) {
- int size = ep->e2d_reclen;
-
- if (ep->e2d_ino != 0)
- size -= EXT2_DIR_REC_LEN(ep->e2d_namlen);
- if (size > 0) {
- if (size >= slotneeded) {
- slotstatus = FOUND;
- slotoffset = i_offset;
- slotsize = ep->e2d_reclen;
- } else if (slotstatus == NONE) {
- slotfreespace += size;
- if (slotoffset == -1)
- slotoffset = i_offset;
- if (slotfreespace >= slotneeded) {
- slotstatus = COMPACT;
- slotsize = i_offset +
- ep->e2d_reclen - slotoffset;
- }
- }
- }
+ error = ext2_search_dirblock(dp, bp->b_data, &entry_found,
+ cnp->cn_nameptr, cnp->cn_namelen,
+ &entryoffsetinblock, &i_offset, &prevoff,
+ &enduseful, &ss);
+ if (error != 0) {
+ brelse(bp);
+ return (error);
}
-
- /*
- * Check for a name match.
- */
- if (ep->e2d_ino) {
- namlen = ep->e2d_namlen;
- if (namlen == cnp->cn_namelen &&
- !bcmp(cnp->cn_nameptr, ep->e2d_name,
- (unsigned)namlen)) {
- /*
- * Save directory entry's inode number and
- * reclen in ndp->ni_ufs area, and release
- * directory buffer.
- */
- ino = ep->e2d_ino;
- goto found;
- }
+ if (entry_found) {
+ ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
+ (entryoffsetinblock & bmask));
+foundentry:
+ ino = ep->e2d_ino;
+ goto found;
}
- prevoff = i_offset;
- i_offset += ep->e2d_reclen;
- entryoffsetinblock += ep->e2d_reclen;
- if (ep->e2d_ino)
- enduseful = i_offset;
}
-/* notfound: */
+notfound:
/*
* If we started in the middle of the directory and failed
* to find our target, we must check the beginning as well.
@@ -508,15 +483,15 @@ searchloop:
* can be put in the range from dp->i_offset to
* dp->i_offset + dp->i_count.
*/
- if (slotstatus == NONE) {
+ if (ss.slotstatus == NONE) {
dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
dp->i_count = 0;
enduseful = dp->i_offset;
} else {
- dp->i_offset = slotoffset;
- dp->i_count = slotsize;
- if (enduseful < slotoffset + slotsize)
- enduseful = slotoffset + slotsize;
+ dp->i_offset = ss.slotoffset;
+ dp->i_count = ss.slotsize;
+ if (enduseful < ss.slotoffset + ss.slotsize)
+ enduseful = ss.slotoffset + ss.slotsize;
}
dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
/*
@@ -722,6 +697,102 @@ found:
return (0);
}
+int
+ext2_search_dirblock(struct inode *ip, void *data, int *foundp,
+ const char *name, int namelen, int *entryoffsetinblockp,
+ doff_t *offp, doff_t *prevoffp, doff_t *endusefulp,
+ struct ext2fs_searchslot *ssp)
+{
+ struct vnode *vdp;
+ struct ext2fs_direct_2 *ep, *top;
+ uint32_t bsize = ip->i_e2fs->e2fs_bsize;
+ int offset = *entryoffsetinblockp;
+ int namlen;
+
+ vdp = ITOV(ip);
+
+ ep = (struct ext2fs_direct_2 *)((char *)data + offset);
+ top = (struct ext2fs_direct_2 *)((char *)data +
+ bsize - EXT2_DIR_REC_LEN(0));
+
+ while (ep < top) {
+ /*
+ * Full validation checks are slow, so we only check
+ * enough to insure forward progress through the
+ * directory. Complete checks can be run by setting
+ * "vfs.e2fs.dirchk" to be true.
+ */
+ if (ep->e2d_reclen == 0 ||
+ (dirchk && ext2_dirbadentry(vdp, ep, offset))) {
+ int i;
+ ext2_dirbad(ip, *offp, "mangled entry");
+ i = bsize - (offset & (bsize - 1));
+ *offp += i;
+ offset += i;
+ continue;
+ }
+
+ /*
+ * If an appropriate sized slot has not yet been found,
+ * check to see if one is available. Also accumulate space
+ * in the current block so that we can determine if
+ * compaction is viable.
+ */
+ if (ssp->slotstatus != FOUND) {
+ int size = ep->e2d_reclen;
+
+ if (ep->e2d_ino != 0)
+ size -= EXT2_DIR_REC_LEN(ep->e2d_namlen);
+ if (size > 0) {
+ if (size >= ssp->slotneeded) {
+ ssp->slotstatus = FOUND;
+ ssp->slotoffset = *offp;
+ ssp->slotsize = ep->e2d_reclen;
+ } else if (ssp->slotstatus == NONE) {
+ ssp->slotfreespace += size;
+ if (ssp->slotoffset == -1)
+ ssp->slotoffset = *offp;
+ if (ssp->slotfreespace >= ssp->slotneeded) {
+ ssp->slotstatus = COMPACT;
+ ssp->slotsize = *offp +
+ ep->e2d_reclen -
+ ssp->slotoffset;
+ }
+ }
+ }
+ }
+
+ /*
+ * Check for a name match.
+ */
+ if (ep->e2d_ino) {
+ namlen = ep->e2d_namlen;
+ if (namlen == namelen &&
+ !bcmp(name, ep->e2d_name, (unsigned)namlen)) {
+ /*
+ * Save directory entry's inode number and
+ * reclen in ndp->ni_ufs area, and release
+ * directory buffer.
+ */
+ *foundp = 1;
+ return (0);
+ }
+ }
+ *prevoffp = *offp;
+ *offp += ep->e2d_reclen;
+ offset += ep->e2d_reclen;
+ *entryoffsetinblockp = offset;
+ if (ep->e2d_ino)
+ *endusefulp = *offp;
+ /*
+ * Get pointer to the next entry.
+ */
+ ep = (struct ext2fs_direct_2 *)((char *)data + offset);
+ }
+
+ return (0);
+}
+
void
ext2_dirbad(struct inode *ip, doff_t offset, char *how)
{
@@ -791,16 +862,12 @@ ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de,
int
ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
{
- struct ext2fs_direct_2 *ep, *nep;
struct inode *dp;
- struct buf *bp;
struct ext2fs_direct_2 newdir;
struct iovec aiov;
struct uio auio;
- u_int dsize;
- int error, loc, newentrysize, spacefree;
- char *dirbuf;
- int DIRBLKSIZ = ip->i_e2fs->e2fs_bsize;
+ int error, newentrysize;
+ int DIRBLKSIZ = ip->i_e2fs->e2fs_bsize;
#ifdef INVARIANTS
@@ -817,6 +884,28 @@ ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
newdir.e2d_type = EXT2_FT_UNKNOWN;
bcopy(cnp->cn_nameptr, newdir.e2d_name, (unsigned)cnp->cn_namelen + 1);
newentrysize = EXT2_DIR_REC_LEN(newdir.e2d_namlen);
+
+ if (ext2_htree_has_idx(dp)) {
+ error = ext2_htree_add_entry(dvp, &newdir, cnp);
+ if (error) {
+ dp->i_flag &= ~IN_E4INDEX;
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ return (error);
+ }
+
+ if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
+ !ext2_htree_has_idx(dp)) {
+ if ((dp->i_size / DIRBLKSIZ) == 1 &&
+ dp->i_offset == DIRBLKSIZ) {
+ /*
+ * Making indexed directory when one block is not
+ * enough to save all entries.
+ */
+ return ext2_htree_create_index(dvp, cnp, &newdir);
+ }
+ }
+
if (dp->i_count == 0) {
/*
* If dp->i_count is 0, then namei could find no
@@ -848,6 +937,29 @@ ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
return (error);
}
+ error = ext2_add_entry(dvp, &newdir);
+ if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
+ error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
+ cnp->cn_cred, cnp->cn_thread);
+ return (error);
+}
+
+/*
+ * Insert an entry into the directory block.
+ * Compact the contents.
+ */
+int
+ext2_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry)
+{
+ struct ext2fs_direct_2 *ep, *nep;
+ struct inode *dp;
+ struct buf *bp;
+ u_int dsize;
+ int error, loc, newentrysize, spacefree;
+ char *dirbuf;
+
+ dp = VTOI(dvp);
+
/*
* If dp->i_count is non-zero, then namei found space
* for the new entry in the range dp->i_offset to
@@ -879,6 +991,7 @@ ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
* dp->i_offset + dp->i_count would yield the
* space.
*/
+ newentrysize = EXT2_DIR_REC_LEN(entry->e2d_namlen);
ep = (struct ext2fs_direct_2 *)dirbuf;
dsize = EXT2_DIR_REC_LEN(ep->e2d_namlen);
spacefree = ep->e2d_reclen - dsize;
@@ -904,15 +1017,15 @@ ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
if (ep->e2d_ino == 0) {
if (spacefree + dsize < newentrysize)
panic("ext2_direnter: compact1");
- newdir.e2d_reclen = spacefree + dsize;
+ entry->e2d_reclen = spacefree + dsize;
} else {
if (spacefree < newentrysize)
panic("ext2_direnter: compact2");
- newdir.e2d_reclen = spacefree;
+ entry->e2d_reclen = spacefree;
ep->e2d_reclen = dsize;
ep = (struct ext2fs_direct_2 *)((char *)ep + dsize);
}
- bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
+ bcopy((caddr_t)entry, (caddr_t)ep, (u_int)newentrysize);
if (DOINGASYNC(dvp)) {
bdwrite(bp);
error = 0;
@@ -920,9 +1033,6 @@ ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
error = bwrite(bp);
}
dp->i_flag |= IN_CHANGE | IN_UPDATE;
- if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
- error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
- cnp->cn_cred, cnp->cn_thread);
return (error);
}
diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c
index 5339aa2..fab98a5 100644
--- a/sys/fs/ext2fs/ext2_vfsops.c
+++ b/sys/fs/ext2fs/ext2_vfsops.c
@@ -399,8 +399,22 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
if (es->e2fs_rev == E2FS_REV0 ||
!EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
fs->e2fs_maxfilesize = 0x7fffffff;
- else
- fs->e2fs_maxfilesize = 0x7fffffffffffffff;
+ else {
+ fs->e2fs_maxfilesize = 0xffffffffffff;
+ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
+ fs->e2fs_maxfilesize = 0x7fffffffffffffff;
+ }
+ if (es->e4fs_flags & E2FS_UNSIGNED_HASH) {
+ fs->e2fs_uhash = 3;
+ } else if ((es->e4fs_flags & E2FS_SIGNED_HASH) == 0) {
+#ifdef __CHAR_UNSIGNED__
+ es->e4fs_flags |= E2FS_UNSIGNED_HASH;
+ fs->e2fs_uhash = 3;
+#else
+ es->e4fs_flags |= E2FS_SIGNED_HASH;
+#endif
+ }
+
return (0);
}
diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h
index 756803f..b8c4701 100644
--- a/sys/fs/ext2fs/ext2fs.h
+++ b/sys/fs/ext2fs/ext2fs.h
@@ -147,6 +147,7 @@ struct m_ext2fs {
int32_t e2fs_contigsumsize; /* size of cluster summary array */
int32_t *e2fs_maxcluster; /* max cluster in each cyl group */
struct csum *e2fs_clustersum; /* cluster summary in each cyl group */
+ int32_t e2fs_uhash; /* 3 if hash should be signed, 0 if not */
};
/* cluster summary information */
@@ -213,6 +214,7 @@ struct csum {
* - EXT2F_INCOMPAT_FLEX_BG
* - EXT2F_INCOMPAT_META_BG
*/
+#define EXT2F_COMPAT_SUPP EXT2F_COMPAT_DIRHASHINDEX
#define EXT2F_ROCOMPAT_SUPP (EXT2F_ROCOMPAT_SPARSESUPER | \
EXT2F_ROCOMPAT_LARGEFILE | \
EXT2F_ROCOMPAT_EXTRA_ISIZE)
@@ -243,6 +245,12 @@ struct csum {
#define E2FS_ISCLEAN 0x0001 /* Unmounted cleanly */
#define E2FS_ERRORS 0x0002 /* Errors detected */
+/*
+ * Filesystem miscellaneous flags
+ */
+#define E2FS_SIGNED_HASH 0x0001
+#define E2FS_UNSIGNED_HASH 0x0002
+
/* ext2 file system block group descriptor */
struct ext2_gd {
diff --git a/sys/modules/ext2fs/Makefile b/sys/modules/ext2fs/Makefile
index 48697da..fc10ab0 100644
--- a/sys/modules/ext2fs/Makefile
+++ b/sys/modules/ext2fs/Makefile
@@ -3,8 +3,8 @@
.PATH: ${.CURDIR}/../../fs/ext2fs
KMOD= ext2fs
SRCS= opt_ddb.h opt_directio.h opt_quota.h opt_suiddir.h vnode_if.h \
- ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c \
- ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \
+ ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c ext2_hash.c \
+ ext2_htree.c ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \
ext2_vfsops.c ext2_vnops.c
.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud