Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: 1) Add BQL support to via-rhine, from Tino Reichardt. 2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers can support hw switch offloading. From Floria Fainelli. 3) Allow 'ip address' commands to initiate multicast group join/leave, from Madhu Challa. 4) Many ipv4 FIB lookup optimizations from Alexander Duyck. 5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel Borkmann. 6) Remove the ugly compat support in ARP for ugly layers like ax25, rose, etc. And use this to clean up the neigh layer, then use it to implement MPLS support. All from Eric Biederman. 7) Support L3 forwarding offloading in switches, from Scott Feldman. 8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed up route lookups even further. From Alexander Duyck. 9) Many improvements and bug fixes to the rhashtable implementation, from Herbert Xu and Thomas Graf. In particular, in the case where an rhashtable user bulk adds a large number of items into an empty table, we expand the table much more sanely. 10) Don't make the tcp_metrics hash table per-namespace, from Eric Biederman. 11) Extend EBPF to access SKB fields, from Alexei Starovoitov. 12) Split out new connection request sockets so that they can be established in the main hash table. Much less false sharing since hash lookups go direct to the request sockets instead of having to go first to the listener then to the request socks hashed underneath. From Eric Dumazet. 13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk. 14) Support stable privacy address generation for RFC7217 in IPV6. From Hannes Frederic Sowa. 15) Hash network namespace into IP frag IDs, also from Hannes Frederic Sowa. 16) Convert PTP get/set methods to use 64-bit time, from Richard Cochran. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits) fm10k: Bump driver version to 0.15.2 fm10k: corrected VF multicast update fm10k: mbx_update_max_size does not drop all oversized messages fm10k: reset head instead of calling update_max_size fm10k: renamed mbx_tx_dropped to mbx_tx_oversized fm10k: update xcast mode before synchronizing multicast addresses fm10k: start service timer on probe fm10k: fix function header comment fm10k: comment next_vf_mbx flow fm10k: don't handle mailbox events in iov_event path and always process mailbox fm10k: use separate workqueue for fm10k driver fm10k: Set PF queues to unlimited bandwidth during virtualization fm10k: expose tx_timeout_count as an ethtool stat fm10k: only increment tx_timeout_count in Tx hang path fm10k: remove extraneous "Reset interface" message fm10k: separate PF only stats so that VF does not display them fm10k: use hw->mac.max_queues for stats fm10k: only show actual queues, not the maximum in hardware fm10k: allow creation of VLAN on default vid fm10k: fix unused warnings ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-04-15 09:00:47 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-04-15 09:00:47 -0700
commit: 6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree: 74d1ec65087df1da1021b43ac51acc1ee8601809 /lib
parent: bb0fd7ab0986105765d11baa82e619c618a235aa (diff)
parent: 9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff)
download: op-kernel-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.zip
op-kernel-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.tar.gz
3 files changed, 380 insertions, 701 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b5344ef..4898442 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,13 +1,13 @@
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
- * Based on the following paper:
- * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
- *
  * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,121 +27,18 @@
 #include <linux/err.h>
 
 #define HASH_DEFAULT_SIZE	64UL
-#define HASH_MIN_SIZE		4UL
+#define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU   128UL
 
-/* Base bits plus 1 bit for nulls marker */
-#define HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
-
-enum {
-	RHT_LOCK_NORMAL,
-	RHT_LOCK_NESTED,
-};
-
-/* The bucket lock is selected based on the hash and protects mutations
- * on a group of hash buckets.
- *
- * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
- * a single lock always covers both buckets which may both contains
- * entries which link to the same bucket of the old table during resizing.
- * This allows to simplify the locking as locking the bucket in both
- * tables during resize always guarantee protection.
- *
- * IMPORTANT: When holding the bucket lock of both the old and new table
- * during expansions and shrinking, the old bucket lock must always be
- * acquired first.
- */
-static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
-{
-	return &tbl->locks[hash & tbl->locks_mask];
-}
-
-static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
-{
-	return (void *) he - ht->p.head_offset;
-}
-
-static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
-{
-	return hash & (tbl->size - 1);
-}
-
-static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
-{
-	u32 hash;
-
-	if (unlikely(!ht->p.key_len))
-		hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
-	else
-		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
-				    ht->p.hash_rnd);
-
-	return hash >> HASH_RESERVED_SPACE;
-}
-
-static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
-{
-	return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
-}
-
-static u32 head_hashfn(const struct rhashtable *ht,
+static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
 {
-	return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
+	return rht_head_hashfn(ht, tbl, he, ht->p);
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-static void debug_dump_buckets(const struct rhashtable *ht,
-			       const struct bucket_table *tbl)
-{
-	struct rhash_head *he;
-	unsigned int i, hash;
-
-	for (i = 0; i < tbl->size; i++) {
-		pr_warn(" [Bucket %d] ", i);
-		rht_for_each_rcu(he, tbl, i) {
-			hash = head_hashfn(ht, tbl, he);
-			pr_cont("[hash = %#x, lock = %p] ",
-				hash, bucket_lock(tbl, hash));
-		}
-		pr_cont("\n");
-	}
-
-}
-
-static void debug_dump_table(struct rhashtable *ht,
-			     const struct bucket_table *tbl,
-			     unsigned int hash)
-{
-	struct bucket_table *old_tbl, *future_tbl;
-
-	pr_emerg("BUG: lock for hash %#x in table %p not held\n",
-		 hash, tbl);
-
-	rcu_read_lock();
-	future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	if (future_tbl != old_tbl) {
-		pr_warn("Future table %p (size: %zd)\n",
-			future_tbl, future_tbl->size);
-		debug_dump_buckets(ht, future_tbl);
-	}
-
-	pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
-	debug_dump_buckets(ht, old_tbl);
-
-	rcu_read_unlock();
-}
-
 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
-#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)				\
-	do {								\
-		if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) {	\
-			debug_dump_table(HT, TBL, HASH);		\
-			BUG();						\
-		}							\
-	} while (0)
 
 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
@@ -151,30 +48,18 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	spinlock_t *lock = bucket_lock(tbl, hash);
+	spinlock_t *lock = rht_bucket_lock(tbl, hash);
 
 	return (debug_locks) ? lockdep_is_held(lock) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #else
 #define ASSERT_RHT_MUTEX(HT)
-#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
 #endif
 
 
-static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
-{
-	struct rhash_head __rcu **pprev;
-
-	for (pprev = &tbl->buckets[n];
-	     !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
-	     pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
-		;
-
-	return pprev;
-}
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
+static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
+			      gfp_t gfp)
 {
 	unsigned int i, size;
 #if defined(CONFIG_PROVE_LOCKING)
@@ -191,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
 
 	if (sizeof(spinlock_t) != 0) {
 #ifdef CONFIG_NUMA
-		if (size * sizeof(spinlock_t) > PAGE_SIZE)
+		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
+		    gfp == GFP_KERNEL)
 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
 		else
 #endif
 		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-					   GFP_KERNEL);
+					   gfp);
 		if (!tbl->locks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
@@ -215,153 +101,181 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	kvfree(tbl);
 }
 
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+	bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
-					       size_t nbuckets)
+					       size_t nbuckets,
+					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
 	size_t size;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-		tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-	if (tbl == NULL)
+	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
+	    gfp != GFP_KERNEL)
+		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
+	if (tbl == NULL && gfp == GFP_KERNEL)
 		tbl = vzalloc(size);
 	if (tbl == NULL)
 		return NULL;
 
 	tbl->size = nbuckets;
 
-	if (alloc_bucket_locks(ht, tbl) < 0) {
+	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
 
+	INIT_LIST_HEAD(&tbl->walkers);
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
 	for (i = 0; i < nbuckets; i++)
 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
 
 	return tbl;
 }
 
-/**
- * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
- * @ht:		hash table
- * @new_size:	new table size
- */
-static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+						  struct bucket_table *tbl)
 {
-	/* Expand table when exceeding 75% load */
-	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
-	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
-}
+	struct bucket_table *new_tbl;
 
-/**
- * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
- * @ht:		hash table
- * @new_size:	new table size
- */
-static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
-{
-	/* Shrink table beneath 30% load */
-	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
-	       (atomic_read(&ht->shift) > ht->p.min_shift);
-}
+	do {
+		new_tbl = tbl;
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	} while (tbl);
 
-static void lock_buckets(struct bucket_table *new_tbl,
-			 struct bucket_table *old_tbl, unsigned int hash)
-	__acquires(old_bucket_lock)
-{
-	spin_lock_bh(bucket_lock(old_tbl, hash));
-	if (new_tbl != old_tbl)
-		spin_lock_bh_nested(bucket_lock(new_tbl, hash),
-				    RHT_LOCK_NESTED);
+	return new_tbl;
 }
 
-static void unlock_buckets(struct bucket_table *new_tbl,
-			   struct bucket_table *old_tbl, unsigned int hash)
-	__releases(old_bucket_lock)
+static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
-	if (new_tbl != old_tbl)
-		spin_unlock_bh(bucket_lock(new_tbl, hash));
-	spin_unlock_bh(bucket_lock(old_tbl, hash));
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl = rhashtable_last_table(ht,
+		rht_dereference_rcu(old_tbl->future_tbl, ht));
+	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
+	int err = -ENOENT;
+	struct rhash_head *head, *next, *entry;
+	spinlock_t *new_bucket_lock;
+	unsigned int new_hash;
+
+	rht_for_each(entry, old_tbl, old_hash) {
+		err = 0;
+		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
+
+		if (rht_is_a_nulls(next))
+			break;
+
+		pprev = &entry->next;
+	}
+
+	if (err)
+		goto out;
+
+	new_hash = head_hashfn(ht, new_tbl, entry);
+
+	new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+
+	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
+	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
+				      new_tbl, new_hash);
+
+	if (rht_is_a_nulls(head))
+		INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash);
+	else
+		RCU_INIT_POINTER(entry->next, head);
+
+	rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
+	spin_unlock(new_bucket_lock);
+
+	rcu_assign_pointer(*pprev, next);
+
+out:
+	return err;
 }
 
-/**
- * Unlink entries on bucket which hash to different bucket.
- *
- * Returns true if no more work needs to be performed on the bucket.
- */
-static bool hashtable_chain_unzip(struct rhashtable *ht,
-				  const struct bucket_table *new_tbl,
-				  struct bucket_table *old_tbl,
-				  size_t old_hash)
+static void rhashtable_rehash_chain(struct rhashtable *ht,
+				    unsigned int old_hash)
 {
-	struct rhash_head *he, *p, *next;
-	unsigned int new_hash, new_hash2;
-
-	ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	spinlock_t *old_bucket_lock;
 
-	/* Old bucket empty, no work needed. */
-	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-				   old_hash);
-	if (rht_is_a_nulls(p))
-		return false;
+	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
-	new_hash = head_hashfn(ht, new_tbl, p);
-	ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
+	spin_lock_bh(old_bucket_lock);
+	while (!rhashtable_rehash_one(ht, old_hash))
+		;
+	old_tbl->rehash++;
+	spin_unlock_bh(old_bucket_lock);
+}
 
-	/* Advance the old bucket pointer one or more times until it
-	 * reaches a node that doesn't hash to the same bucket as the
-	 * previous node p. Call the previous node p;
-	 */
-	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
-		new_hash2 = head_hashfn(ht, new_tbl, he);
-		ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+				    struct bucket_table *old_tbl,
+				    struct bucket_table *new_tbl)
+{
+	/* Protect future_tbl using the first bucket lock. */
+	spin_lock_bh(old_tbl->locks);
 
-		if (new_hash != new_hash2)
-			break;
-		p = he;
+	/* Did somebody beat us to it? */
+	if (rcu_access_pointer(old_tbl->future_tbl)) {
+		spin_unlock_bh(old_tbl->locks);
+		return -EEXIST;
 	}
-	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
 
-	/* Find the subsequent node which does hash to the same
-	 * bucket as node P, or NULL if no such node exists.
+	/* Make insertions go into the new, empty table right away. Deletions
+	 * and lookups will be attempted in both tables until we synchronize.
 	 */
-	INIT_RHT_NULLS_HEAD(next, ht, old_hash);
-	if (!rht_is_a_nulls(he)) {
-		rht_for_each_continue(he, he->next, old_tbl, old_hash) {
-			if (head_hashfn(ht, new_tbl, he) == new_hash) {
-				next = he;
-				break;
-			}
-		}
-	}
+	rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
 
-	/* Set p's next pointer to that subsequent node pointer,
-	 * bypassing the nodes which do not hash to p's bucket
-	 */
-	rcu_assign_pointer(p->next, next);
+	/* Ensure the new table is visible to readers. */
+	smp_wmb();
 
-	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-				   old_hash);
+	spin_unlock_bh(old_tbl->locks);
 
-	return !rht_is_a_nulls(p);
+	return 0;
 }
 
-static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
-			    unsigned int new_hash, struct rhash_head *entry)
+static int rhashtable_rehash_table(struct rhashtable *ht)
 {
-	ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl;
+	struct rhashtable_walker *walker;
+	unsigned int old_hash;
+
+	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+	if (!new_tbl)
+		return 0;
+
+	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+		rhashtable_rehash_chain(ht, old_hash);
+
+	/* Publish the new table pointer. */
+	rcu_assign_pointer(ht->tbl, new_tbl);
+
+	spin_lock(&ht->lock);
+	list_for_each_entry(walker, &old_tbl->walkers, list)
+		walker->tbl = NULL;
+	spin_unlock(&ht->lock);
 
-	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
+	/* Wait for readers. All new readers will see the new
+	 * table, and thus no references to the old table will
+	 * remain.
+	 */
+	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+
+	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
 /**
  * rhashtable_expand - Expand hash table while allowing concurrent lookups
  * @ht:		the hash table to expand
  *
- * A secondary bucket array is allocated and the hash entries are migrated
- * while keeping them on both lists until the end of the RCU grace period.
+ * A secondary bucket array is allocated and the hash entries are migrated.
  *
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -372,89 +286,32 @@ static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	struct rhash_head *he;
-	unsigned int new_hash, old_hash;
-	bool complete = false;
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
+	old_tbl = rhashtable_last_table(ht, old_tbl);
+
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	atomic_inc(&ht->shift);
-
-	/* Make insertions go into the new, empty table right away. Deletions
-	 * and lookups will be attempted in both tables until we synchronize.
-	 * The synchronize_rcu() guarantees for the new table to be picked up
-	 * so no new additions go into the old table while we relink.
-	 */
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
-	synchronize_rcu();
-
-	/* For each new bucket, search the corresponding old bucket for the
-	 * first entry that hashes to the new bucket, and link the end of
-	 * newly formed bucket chain (containing entries added to future
-	 * table) to that entry. Since all the entries which will end up in
-	 * the new bucket appear in the same old bucket, this constructs an
-	 * entirely valid new hash table, but with multiple buckets
-	 * "zipped" together into a single imprecise chain.
-	 */
-	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-		old_hash = rht_bucket_index(old_tbl, new_hash);
-		lock_buckets(new_tbl, old_tbl, new_hash);
-		rht_for_each(he, old_tbl, old_hash) {
-			if (head_hashfn(ht, new_tbl, he) == new_hash) {
-				link_old_to_new(ht, new_tbl, new_hash, he);
-				break;
-			}
-		}
-		unlock_buckets(new_tbl, old_tbl, new_hash);
-		cond_resched();
-	}
-
-	/* Unzip interleaved hash chains */
-	while (!complete && !ht->being_destroyed) {
-		/* Wait for readers. All new readers will see the new
-		 * table, and thus no references to the old table will
-		 * remain.
-		 */
-		synchronize_rcu();
-
-		/* For each bucket in the old table (each of which
-		 * contains items from multiple buckets of the new
-		 * table): ...
-		 */
-		complete = true;
-		for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-			lock_buckets(new_tbl, old_tbl, old_hash);
-
-			if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
-						  old_hash))
-				complete = false;
-
-			unlock_buckets(new_tbl, old_tbl, old_hash);
-			cond_resched();
-		}
-	}
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
 
-	rcu_assign_pointer(ht->tbl, new_tbl);
-	synchronize_rcu();
-
-	bucket_table_free(old_tbl);
-	return 0;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_expand);
 
 /**
  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
  * @ht:		the hash table to shrink
  *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ * This function shrinks the hash table to fit, i.e., the smallest
+ * size would not cause it to expand right away automatically.
  *
  * The caller must ensure that no concurrent resizing occurs by holding
  * ht->mutex.
@@ -465,395 +322,146 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_shrink(struct rhashtable *ht)
+static int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
-	unsigned int new_hash;
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	unsigned int size;
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
-	if (new_tbl == NULL)
-		return -ENOMEM;
-
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
-	synchronize_rcu();
-
-	/* Link the first entry in the old bucket to the end of the
-	 * bucket in the new table. As entries are concurrently being
-	 * added to the new table, lock down the new bucket. As we
-	 * always divide the size in half when shrinking, each bucket
-	 * in the new table maps to exactly two buckets in the old
-	 * table.
-	 */
-	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-		lock_buckets(new_tbl, tbl, new_hash);
-
-		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
-				   tbl->buckets[new_hash]);
-		ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
-		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
-				   tbl->buckets[new_hash + new_tbl->size]);
+	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	if (size < ht->p.min_size)
+		size = ht->p.min_size;
 
-		unlock_buckets(new_tbl, tbl, new_hash);
-		cond_resched();
-	}
+	if (old_tbl->size <= size)
+		return 0;
 
-	/* Publish the new, valid hash table */
-	rcu_assign_pointer(ht->tbl, new_tbl);
-	atomic_dec(&ht->shift);
+	if (rht_dereference(old_tbl->future_tbl, ht))
+		return -EEXIST;
 
-	/* Wait for readers. No new readers will have references to the
-	 * old hash table.
-	 */
-	synchronize_rcu();
+	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+	if (new_tbl == NULL)
+		return -ENOMEM;
 
-	bucket_table_free(tbl);
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
 
-	return 0;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_shrink);
 
 static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
-	struct rhashtable_walker *walker;
+	int err = 0;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
-	if (ht->being_destroyed)
-		goto unlock;
 
 	tbl = rht_dereference(ht->tbl, ht);
+	tbl = rhashtable_last_table(ht, tbl);
 
-	list_for_each_entry(walker, &ht->walkers, list)
-		walker->resize = true;
-
-	if (rht_grow_above_75(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
-	else if (rht_shrink_below_30(ht, tbl->size))
+	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
-unlock:
-	mutex_unlock(&ht->mutex);
-}
 
-static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				struct bucket_table *tbl,
-				const struct bucket_table *old_tbl, u32 hash)
-{
-	bool no_resize_running = tbl == old_tbl;
-	struct rhash_head *head;
+	err = rhashtable_rehash_table(ht);
 
-	hash = rht_bucket_index(tbl, hash);
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
-
-	ASSERT_BUCKET_LOCK(ht, tbl, hash);
-
-	if (rht_is_a_nulls(head))
-		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
-	else
-		RCU_INIT_POINTER(obj->next, head);
-
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	mutex_unlock(&ht->mutex);
 
-	atomic_inc(&ht->nelems);
-	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+	if (err)
 		schedule_work(&ht->run_work);
 }
 
-/**
- * rhashtable_insert - insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Will take a per bucket spinlock to protect against mutual mutations
- * on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
+static bool rhashtable_check_elasticity(struct rhashtable *ht,
+					struct bucket_table *tbl,
+					unsigned int hash)
 {
-	struct bucket_table *tbl, *old_tbl;
-	unsigned hash;
-
-	rcu_read_lock();
-
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
+	unsigned int elasticity = ht->elasticity;
+	struct rhash_head *head;
 
-	lock_buckets(tbl, old_tbl, hash);
-	__rhashtable_insert(ht, obj, tbl, old_tbl, hash);
-	unlock_buckets(tbl, old_tbl, hash);
+	rht_for_each(head, tbl, hash)
+		if (!--elasticity)
+			return true;
 
-	rcu_read_unlock();
+	return false;
 }
-EXPORT_SYMBOL_GPL(rhashtable_insert);
 
-/**
- * rhashtable_remove - remove object from hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Since the hash chain is single linked, the removal operation needs to
- * walk the bucket chain upon removal. The removal operation is thus
- * considerable slow if the hash table is not correctly sized.
- *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
- *
- * The caller must ensure that no concurrent table mutations occur. It is
- * however valid to have concurrent lookups if they are RCU protected.
- */
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
+int rhashtable_insert_rehash(struct rhashtable *ht)
 {
-	struct bucket_table *tbl, *new_tbl, *old_tbl;
-	struct rhash_head __rcu **pprev;
-	struct rhash_head *he, *he2;
-	unsigned int hash, new_hash;
-	bool ret = false;
+	struct bucket_table *old_tbl;
+	struct bucket_table *new_tbl;
+	struct bucket_table *tbl;
+	unsigned int size;
+	int err;
 
-	rcu_read_lock();
 	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
-
-	lock_buckets(new_tbl, old_tbl, new_hash);
-restart:
-	hash = rht_bucket_index(tbl, new_hash);
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
-		if (he != obj) {
-			pprev = &he->next;
-			continue;
-		}
-
-		ASSERT_BUCKET_LOCK(ht, tbl, hash);
-
-		if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
-		    !rht_is_a_nulls(obj->next) &&
-		    head_hashfn(ht, tbl, obj->next) != hash) {
-			rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
-		} else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
-			rht_for_each_continue(he2, obj->next, tbl, hash) {
-				if (head_hashfn(ht, tbl, he2) == hash) {
-					rcu_assign_pointer(*pprev, he2);
-					goto found;
-				}
-			}
-
-			rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
-		} else {
-			rcu_assign_pointer(*pprev, obj->next);
-		}
-
-found:
-		ret = true;
-		break;
-	}
-
-	/* The entry may be linked in either 'tbl', 'future_tbl', or both.
-	 * 'future_tbl' only exists for a short period of time during
-	 * resizing. Thus traversing both is fine and the added cost is
-	 * very rare.
-	 */
-	if (tbl != old_tbl) {
-		tbl = old_tbl;
-		goto restart;
-	}
-
-	unlock_buckets(new_tbl, old_tbl, new_hash);
-
-	if (ret) {
-		bool no_resize_running = new_tbl == old_tbl;
-
-		atomic_dec(&ht->nelems);
-		if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
-			schedule_work(&ht->run_work);
-	}
-
-	rcu_read_unlock();
+	tbl = rhashtable_last_table(ht, old_tbl);
 
-	return ret;
-}
-EXPORT_SYMBOL_GPL(rhashtable_remove);
-
-struct rhashtable_compare_arg {
-	struct rhashtable *ht;
-	const void *key;
-};
-
-static bool rhashtable_compare(void *ptr, void *arg)
-{
-	struct rhashtable_compare_arg *x = arg;
-	struct rhashtable *ht = x->ht;
-
-	return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
-}
-
-/**
- * rhashtable_lookup - lookup key in hash table
- * @ht:		hash table
- * @key:	pointer to key
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- */
-void *rhashtable_lookup(struct rhashtable *ht, const void *key)
-{
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = key,
-	};
-
-	BUG_ON(!ht->p.key_len);
-
-	return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup);
-
-/**
- * rhashtable_lookup_compare - search hash table with compare function
- * @ht:		hash table
- * @key:	the pointer to the key
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Traverses the bucket chain behind the provided hash value and calls the
- * specified compare function for each entry.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Returns the first entry on which the compare function returned true.
- */
-void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
-				bool (*compare)(void *, void *), void *arg)
-{
-	const struct bucket_table *tbl, *old_tbl;
-	struct rhash_head *he;
-	u32 hash;
+	size = tbl->size;
 
-	rcu_read_lock();
+	if (rht_grow_above_75(ht, tbl))
+		size *= 2;
+	/* More than two rehashes (not resizes) detected. */
+	else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
+		return -EBUSY;
 
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	hash = key_hashfn(ht, key, ht->p.key_len);
-restart:
-	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
-		if (!compare(rht_obj(ht, he), arg))
-			continue;
-		rcu_read_unlock();
-		return rht_obj(ht, he);
-	}
+	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+	if (new_tbl == NULL)
+		return -ENOMEM;
 
-	if (unlikely(tbl != old_tbl)) {
-		tbl = old_tbl;
-		goto restart;
-	}
-	rcu_read_unlock();
+	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+	if (err) {
+		bucket_table_free(new_tbl);
+		if (err == -EEXIST)
+			err = 0;
+	} else
+		schedule_work(&ht->run_work);
 
-	return NULL;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
+EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
 
-/**
- * rhashtable_lookup_insert - lookup and insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
+int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			   struct rhash_head *obj,
+			   struct bucket_table *tbl)
 {
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = rht_obj(ht, obj) + ht->p.key_offset,
-	};
+	struct rhash_head *head;
+	unsigned int hash;
+	int err;
 
-	BUG_ON(!ht->p.key_len);
+	tbl = rhashtable_last_table(ht, tbl);
+	hash = head_hashfn(ht, tbl, obj);
+	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 
-	return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
-						&arg);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+	err = -EEXIST;
+	if (key && rhashtable_lookup_fast(ht, key, ht->p))
+		goto exit;
 
-/**
- * rhashtable_lookup_compare_insert - search and insert object to hash table
- *                                    with compare function
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
-				      struct rhash_head *obj,
-				      bool (*compare)(void *, void *),
-				      void *arg)
-{
-	struct bucket_table *new_tbl, *old_tbl;
-	u32 new_hash;
-	bool success = true;
+	err = -EAGAIN;
+	if (rhashtable_check_elasticity(ht, tbl, hash) ||
+	    rht_grow_above_100(ht, tbl))
+		goto exit;
 
-	BUG_ON(!ht->p.key_len);
+	err = 0;
 
-	rcu_read_lock();
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
-	lock_buckets(new_tbl, old_tbl, new_hash);
+	RCU_INIT_POINTER(obj->next, head);
 
-	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
-				      compare, arg)) {
-		success = false;
-		goto exit;
-	}
+	rcu_assign_pointer(tbl->buckets[hash], obj);
 
-	__rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash);
+	atomic_inc(&ht->nelems);
 
 exit:
-	unlock_buckets(new_tbl, old_tbl, new_hash);
-	rcu_read_unlock();
+	spin_unlock(rht_bucket_lock(tbl, hash));
 
-	return success;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
+EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 
 /**
  * rhashtable_walk_init - Initialise an iterator
@@ -887,11 +495,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
 	if (!iter->walker)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&iter->walker->list);
-	iter->walker->resize = false;
-
 	mutex_lock(&ht->mutex);
-	list_add(&iter->walker->list, &ht->walkers);
+	iter->walker->tbl = rht_dereference(ht->tbl, ht);
+	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
 	mutex_unlock(&ht->mutex);
 
 	return 0;
@@ -907,7 +513,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
 	mutex_lock(&iter->ht->mutex);
-	list_del(&iter->walker->list);
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
 	mutex_unlock(&iter->ht->mutex);
 	kfree(iter->walker);
 }
@@ -928,13 +535,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * by calling rhashtable_walk_next.
  */
 int rhashtable_walk_start(struct rhashtable_iter *iter)
+	__acquires(RCU)
 {
+	struct rhashtable *ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
+
 	rcu_read_lock();
 
-	if (iter->walker->resize) {
-		iter->slot = 0;
-		iter->skip = 0;
-		iter->walker->resize = false;
+	mutex_unlock(&ht->mutex);
+
+	if (!iter->walker->tbl) {
+		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
@@ -956,13 +571,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl = iter->walker->tbl;
 	struct rhashtable *ht = iter->ht;
 	struct rhash_head *p = iter->p;
 	void *obj = NULL;
 
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
 	if (p) {
 		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
 		goto next;
@@ -988,17 +601,20 @@ next:
 		iter->skip = 0;
 	}
 
-	iter->p = NULL;
+	/* Ensure we see any new tables. */
+	smp_rmb();
 
-out:
-	if (iter->walker->resize) {
-		iter->p = NULL;
+	iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (iter->walker->tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
-		iter->walker->resize = false;
 		return ERR_PTR(-EAGAIN);
 	}
 
+	iter->p = NULL;
+
+out:
+
 	return obj;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -1010,16 +626,39 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  * Finish a hash table walk.
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
+	__releases(RCU)
 {
-	rcu_read_unlock();
+	struct rhashtable *ht;
+	struct bucket_table *tbl = iter->walker->tbl;
+
+	if (!tbl)
+		goto out;
+
+	ht = iter->ht;
+
+	spin_lock(&ht->lock);
+	if (tbl->rehash < tbl->size)
+		list_add(&iter->walker->list, &tbl->walkers);
+	else
+		iter->walker->tbl = NULL;
+	spin_unlock(&ht->lock);
+
 	iter->p = NULL;
+
+out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
-static size_t rounded_hashtable_size(struct rhashtable_params *params)
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   1UL << params->min_shift);
+		   (unsigned long)params->min_size);
+}
+
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+	return jhash2(key, length, seed);
 }
 
 /**
@@ -1052,7 +691,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	struct rhash_head	node;
  * };
  *
- * u32 my_hash_fn(const void *data, u32 seed)
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
  * {
  *	struct test_obj *obj = data;
  *
@@ -1065,47 +704,74 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
 	size_t size;
 
 	size = HASH_DEFAULT_SIZE;
 
-	if ((params->key_len && !params->hashfn) ||
-	    (!params->key_len && !params->obj_hashfn))
+	if ((!params->key_len && !params->obj_hashfn) ||
+	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
 		return -EINVAL;
 
-	params->min_shift = max_t(size_t, params->min_shift,
-				  ilog2(HASH_MIN_SIZE));
-
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
+	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
-	INIT_LIST_HEAD(&ht->walkers);
+
+	if (params->min_size)
+		ht->p.min_size = roundup_pow_of_two(params->min_size);
+
+	if (params->max_size)
+		ht->p.max_size = rounddown_pow_of_two(params->max_size);
+
+	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
+
+	/* The maximum (not average) chain length grows with the
+	 * size of the hash table, at a rate of (log N)/(log log N).
+	 * The value of 16 is selected so that even if the hash
+	 * table grew to 2^32 you would not expect the maximum
+	 * chain length to exceed it unless we are under attack
+	 * (or extremely unlucky).
+	 *
+	 * As this limit is only to detect attacks, we don't need
+	 * to set it to a lower value as you'd need the chain
+	 * length to vastly exceed 16 to have any real effect
+	 * on the system.
+	 */
+	if (!params->insecure_elasticity)
+		ht->elasticity = 16;
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
 	else
 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
 
-	tbl = bucket_table_alloc(ht, size);
+	ht->key_len = ht->p.key_len;
+	if (!params->hashfn) {
+		ht->p.hashfn = jhash;
+
+		if (!(ht->key_len & (sizeof(u32) - 1))) {
+			ht->key_len /= sizeof(u32);
+			ht->p.hashfn = rhashtable_jhash2;
+		}
+	}
+
+	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;
 
 	atomic_set(&ht->nelems, 0);
-	atomic_set(&ht->shift, ilog2(tbl->size));
-	RCU_INIT_POINTER(ht->tbl, tbl);
-	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
-	if (!ht->p.hash_rnd)
-		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
+	RCU_INIT_POINTER(ht->tbl, tbl);
 
 	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
@@ -1114,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 EXPORT_SYMBOL_GPL(rhashtable_init);
 
 /**
- * rhashtable_destroy - destroy hash table
+ * rhashtable_free_and_destroy - free elements and destroy hash table
  * @ht:		the hash table to destroy
+ * @free_fn:	callback to release resources of element
+ * @arg:	pointer passed to free_fn
+ *
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
  *
- * Frees the bucket array. This function is not rcu safe, therefore the caller
- * has to make sure that no resizing may happen by unpublishing the hashtable
- * and waiting for the quiescent cycle before releasing the bucket array.
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
  */
-void rhashtable_destroy(struct rhashtable *ht)
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg)
 {
-	ht->being_destroyed = true;
+	const struct bucket_table *tbl;
+	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
-	bucket_table_free(rht_dereference(ht->tbl, ht));
+	tbl = rht_dereference(ht->tbl, ht);
+	if (free_fn) {
+		for (i = 0; i < tbl->size; i++) {
+			struct rhash_head *pos, *next;
+
+			for (pos = rht_dereference(tbl->buckets[i], ht),
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL;
+			     !rht_is_a_nulls(pos);
+			     pos = next,
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL)
+				free_fn(rht_obj(ht, pos), arg);
+		}
+	}
+
+	bucket_table_free(tbl);
 	mutex_unlock(&ht->mutex);
 }
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+	return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/lib/sha1.c b/lib/sha1.c
index 1df191e..5a56dfd 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -198,3 +198,4 @@ void sha_init(__u32 *buf)
 	buf[3] = 0x10325476;
 	buf[4] = 0xc3d2e1f0;
 }
+EXPORT_SYMBOL(sha_init);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 67c7593..b295754 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,6 +38,15 @@ struct test_obj {
 	struct rhash_head	node;
 };
 
+static const struct rhashtable_params test_rht_params = {
+	.nelem_hint = TEST_HT_SIZE,
+	.head_offset = offsetof(struct test_obj, node),
+	.key_offset = offsetof(struct test_obj, value),
+	.key_len = sizeof(int),
+	.hashfn = jhash,
+	.nulls_base = (3U << RHT_BASE_SHIFT),
+};
+
 static int __init test_rht_lookup(struct rhashtable *ht)
 {
 	unsigned int i;
@@ -47,7 +56,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 		bool expected = !(i % 2);
 		u32 key = i;
 
-		obj = rhashtable_lookup(ht, &key);
+		obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 
 		if (expected && !obj) {
 			pr_warn("Test failed: Could not find key %u\n", key);
@@ -80,7 +89,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		rcu_cnt = cnt = 0;
 
 		if (!quiet)
-			pr_info(" [%#4x/%zu]", i, tbl->size);
+			pr_info(" [%#4x/%u]", i, tbl->size);
 
 		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;
@@ -133,7 +142,11 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj->ptr = TEST_PTR;
 		obj->value = i * 2;
 
-		rhashtable_insert(ht, &obj->node);
+		err = rhashtable_insert_fast(ht, &obj->node, test_rht_params);
+		if (err) {
+			kfree(obj);
+			goto error;
+		}
 	}
 
 	rcu_read_lock();
@@ -141,30 +154,6 @@ static int __init test_rhashtable(struct rhashtable *ht)
 	test_rht_lookup(ht);
 	rcu_read_unlock();
 
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table expansion iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_expand(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table shrinkage iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_shrink(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
 	rcu_read_lock();
 	test_bucket_stats(ht, true);
 	rcu_read_unlock();
@@ -173,10 +162,10 @@ static int __init test_rhashtable(struct rhashtable *ht)
 	for (i = 0; i < TEST_ENTRIES; i++) {
 		u32 key = i * 2;
 
-		obj = rhashtable_lookup(ht, &key);
+		obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 		BUG_ON(!obj);
 
-		rhashtable_remove(ht, &obj->node);
+		rhashtable_remove_fast(ht, &obj->node, test_rht_params);
 		kfree(obj);
 	}
 
@@ -195,20 +184,11 @@ static struct rhashtable ht;
 
 static int __init test_rht_init(void)
 {
-	struct rhashtable_params params = {
-		.nelem_hint = TEST_HT_SIZE,
-		.head_offset = offsetof(struct test_obj, node),
-		.key_offset = offsetof(struct test_obj, value),
-		.key_len = sizeof(int),
-		.hashfn = jhash,
-		.max_shift = 1, /* we expand/shrink manually here */
-		.nulls_base = (3U << RHT_BASE_SHIFT),
-	};
 	int err;
 
 	pr_info("Running resizable hashtable tests...\n");
 
-	err = rhashtable_init(&ht, &params);
+	err = rhashtable_init(&ht, &test_rht_params);
 	if (err < 0) {
 		pr_warn("Test failed: Unable to initialize hashtable: %d\n",
 			err);
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-04-15 09:00:47 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-04-15 09:00:47 -0700
commit	6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree	74d1ec65087df1da1021b43ac51acc1ee8601809 /lib
parent	bb0fd7ab0986105765d11baa82e619c618a235aa (diff)
parent	9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff)
download	op-kernel-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.zip op-kernel-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.tar.gz