summaryrefslogtreecommitdiffstats
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c105
1 files changed, 52 insertions, 53 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 02fdde2..2197af00 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,12 +98,12 @@ static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
/* nl_table locking explained:
- * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
- * combined with an RCU read-side lock. Insertion and removal are protected
- * with nl_sk_hash_lock while using RCU list modification primitives and may
- * run in parallel to nl_table_lock protected lookups. Destruction of the
- * Netlink socket may only occur *after* nl_table_lock has been acquired
- * either during or after the socket has been removed from the list.
+ * Lookup and traversal are protected with an RCU read-side lock. Insertion
+ * and removal are protected with per bucket lock while using RCU list
+ * modification primitives and may run in parallel to RCU protected lookups.
+ * Destruction of the Netlink socket may only occur *after* nl_table_lock has
+ * been acquired * either during or after the socket has been removed from
+ * the list and after an RCU grace period.
*/
DEFINE_RWLOCK(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -111,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
-/* Protects netlink socket hash table mutations */
-DEFINE_MUTEX(nl_sk_hash_lock);
-EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void *parent)
-{
- if (debug_locks)
- return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
- return 1;
-}
-#endif
-
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -1003,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
.net = net,
.portid = portid,
};
- u32 hash;
- hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
-
- return rhashtable_lookup_compare(&table->hash, hash,
+ return rhashtable_lookup_compare(&table->hash, &portid,
&netlink_compare, &arg);
}
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk)
+{
+ struct netlink_compare_arg arg = {
+ .net = sock_net(sk),
+ .portid = nlk_sk(sk)->portid,
+ };
+
+ return rhashtable_lookup_compare_insert(&table->hash,
+ &nlk_sk(sk)->node,
+ &netlink_compare, &arg);
+}
+
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
struct netlink_table *table = &nl_table[protocol];
struct sock *sk;
- read_lock(&nl_table_lock);
rcu_read_lock();
sk = __netlink_lookup(table, portid, net);
if (sk)
sock_hold(sk);
rcu_read_unlock();
- read_unlock(&nl_table_lock);
return sk;
}
@@ -1053,29 +1047,33 @@ netlink_update_listeners(struct sock *sk)
* makes sure updates are visible before bind or setsockopt return. */
}
-static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
+static int netlink_insert(struct sock *sk, u32 portid)
{
struct netlink_table *table = &nl_table[sk->sk_protocol];
- int err = -EADDRINUSE;
+ int err;
- mutex_lock(&nl_sk_hash_lock);
- if (__netlink_lookup(table, portid, net))
- goto err;
+ lock_sock(sk);
err = -EBUSY;
if (nlk_sk(sk)->portid)
goto err;
err = -ENOMEM;
- if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
+ if (BITS_PER_LONG > 32 &&
+ unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
goto err;
nlk_sk(sk)->portid = portid;
sock_hold(sk);
- rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
+
err = 0;
+ if (!__netlink_insert(table, sk)) {
+ err = -EADDRINUSE;
+ sock_put(sk);
+ }
+
err:
- mutex_unlock(&nl_sk_hash_lock);
+ release_sock(sk);
return err;
}
@@ -1083,13 +1081,11 @@ static void netlink_remove(struct sock *sk)
{
struct netlink_table *table;
- mutex_lock(&nl_sk_hash_lock);
table = &nl_table[sk->sk_protocol];
if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
- mutex_unlock(&nl_sk_hash_lock);
netlink_table_grab();
if (nlk_sk(sk)->subscriptions) {
@@ -1197,6 +1193,13 @@ out_module:
goto out;
}
+static void deferred_put_nlk_sk(struct rcu_head *head)
+{
+ struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+
+ sock_put(&nlk->sk);
+}
+
static int netlink_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -1269,7 +1272,7 @@ static int netlink_release(struct socket *sock)
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
local_bh_enable();
- sock_put(sk);
+ call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}
@@ -1284,7 +1287,6 @@ static int netlink_autobind(struct socket *sock)
retry:
cond_resched();
- netlink_table_grab();
rcu_read_lock();
if (__netlink_lookup(table, portid, net)) {
/* Bind collision, search negative portid values. */
@@ -1292,13 +1294,11 @@ retry:
if (rover > -4097)
rover = -4097;
rcu_read_unlock();
- netlink_table_ungrab();
goto retry;
}
rcu_read_unlock();
- netlink_table_ungrab();
- err = netlink_insert(sk, net, portid);
+ err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
goto retry;
@@ -1486,7 +1486,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (!nlk->portid) {
err = nladdr->nl_pid ?
- netlink_insert(sk, net, nladdr->nl_pid) :
+ netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
netlink_undo_bind(nlk->ngroups, groups, sk);
@@ -2492,7 +2492,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg && cfg->input)
nlk_sk(sk)->netlink_rcv = cfg->input;
- if (netlink_insert(sk, net, 0))
+ if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk);
@@ -2911,7 +2911,9 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
for (j = 0; j < tbl->size; j++) {
- rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+ struct rhash_head *node;
+
+ rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
s = (struct sock *)nlk;
if (sock_net(s) != seq_file_net(seq))
@@ -2929,9 +2931,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
}
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(nl_table_lock) __acquires(RCU)
+ __acquires(RCU)
{
- read_lock(&nl_table_lock);
rcu_read_lock();
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
@@ -2939,6 +2940,8 @@ static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct rhashtable *ht;
+ const struct bucket_table *tbl;
+ struct rhash_head *node;
struct netlink_sock *nlk;
struct nl_seq_iter *iter;
struct net *net;
@@ -2955,17 +2958,17 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
i = iter->link;
ht = &nl_table[i].hash;
- rht_for_each_entry(nlk, nlk->node.next, ht, node)
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
if (net_eq(sock_net((struct sock *)nlk), net))
return nlk;
j = iter->hash_idx + 1;
do {
- const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
for (; j < tbl->size; j++) {
- rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
+ rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
if (net_eq(sock_net((struct sock *)nlk), net)) {
iter->link = i;
iter->hash_idx = j;
@@ -2981,10 +2984,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU) __releases(nl_table_lock)
+ __releases(RCU)
{
rcu_read_unlock();
- read_unlock(&nl_table_lock);
}
@@ -3131,9 +3133,6 @@ static int __init netlink_proto_init(void)
.max_shift = 16, /* 64K */
.grow_decision = rht_grow_above_75,
.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
- .mutex_is_held = lockdep_nl_sk_hash_is_held,
-#endif
};
if (err != 0)
OpenPOWER on IntegriCloud