diff options
author | Balazs Scheidler <bazsi@balabit.hu> | 2010-10-21 13:06:43 +0200 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-10-21 13:06:43 +0200 |
commit | 093d282321daeb19c107e5f1f16d7f68484f3ade (patch) | |
tree | 36e9eed23573068819bf67a91caac6ebf60d0d7c /net/ipv4 | |
parent | 6006db84a91838813cdad8a6622a4e39efe9ea47 (diff) | |
download | op-kernel-dev-093d282321daeb19c107e5f1f16d7f68484f3ade.zip op-kernel-dev-093d282321daeb19c107e5f1f16d7f68484f3ade.tar.gz |
tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()
When __inet_inherit_port() is called on a tproxy connection the wrong locks are
held for the inet_bind_bucket it is added to. __inet_inherit_port() made an
implicit assumption that the listener's port number (and thus its bind bucket).
Unfortunately, if you're using the TPROXY target to redirect skbs to a
transparent proxy that assumption is not true anymore and things break.
This patch adds code to __inet_inherit_port() so that it can handle this case
by looking up or creating a new bind bucket for the child socket and updates
callers of __inet_inherit_port() to gracefully handle __inet_inherit_port()
failing.
Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>.
See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion.
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 10 |
2 files changed, 33 insertions, 5 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb7ad5a..1b344f3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk) } EXPORT_SYMBOL(inet_put_port); -void __inet_inherit_port(struct sock *sk, struct sock *child) +int __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, + unsigned short port = inet_sk(child)->inet_num; + const int bhash = inet_bhashfn(sock_net(sk), port, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; + if (tb->port != port) { + /* NOTE: using tproxy and redirecting skbs to a proxy + * on a different listener port breaks the assumption + * that the listener socket's icsk_bind_hash is the same + * as that of the child socket. We have to look up or + * create a new bind bucket for the child here. */ + struct hlist_node *node; + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (net_eq(ib_net(tb), sock_net(sk)) && + tb->port == port) + break; + } + if (!node) { + tb = inet_bind_bucket_create(table->bind_bucket_cachep, + sock_net(sk), head, port); + if (!tb) { + spin_unlock(&head->lock); + return -ENOMEM; + } + } + } sk_add_bind_node(child, &tb->owners); inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); + + return 0; } EXPORT_SYMBOL_GPL(__inet_inherit_port); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a0232f3..8f8527d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) - goto exit; + goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(newsk, dst); @@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } #endif + if (__inet_inherit_port(sk, newsk) < 0) { + sock_put(newsk); + goto exit; + } __inet_hash_nolisten(newsk, NULL); - __inet_inherit_port(sk, newsk); return newsk; exit_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +exit_nonewsk: + dst_release(dst); exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - dst_release(dst); return NULL; } EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |