diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 11:01:49 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 15:00:15 -0700 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /include/net | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) | |
download | op-kernel-dev-43815482370c510c569fd18edb57afcb0fa8cab6.zip op-kernel-dev-43815482370c510c569fd18edb57afcb0fa8cab6.tar.gz |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/af_unix.h | 20 | ||||
-rw-r--r-- | include/net/sock.h | 38 |
2 files changed, 30 insertions, 28 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78..20725e2 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -30,7 +30,7 @@ struct unix_skb_parms { #endif }; -#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) #define UNIXSID(skb) (&UNIXCB((skb)).secid) @@ -45,21 +45,23 @@ struct unix_skb_parms { struct unix_sock { /* WARNING: sk has to be the first member */ struct sock sk; - struct unix_address *addr; - struct dentry *dentry; - struct vfsmount *mnt; + struct unix_address *addr; + struct dentry *dentry; + struct vfsmount *mnt; struct mutex readlock; - struct sock *peer; - struct sock *other; + struct sock *peer; + struct sock *other; struct list_head link; - atomic_long_t inflight; - spinlock_t lock; + atomic_long_t inflight; + spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; - wait_queue_head_t peer_wait; + struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) +#define peer_wait peer_wq.wait + #ifdef CONFIG_SYSCTL extern int unix_sysctl_register(struct net *net); extern void unix_sysctl_unregister(struct net *net); diff --git a/include/net/sock.h b/include/net/sock.h index e1777db..cc7f91e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -159,7 +159,7 @@ struct sock_common { * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer * @sk_rcvbuf: size of receive buffer in bytes - * @sk_sleep: sock wait queue + * @sk_wq: sock wait queue and async head * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy @@ -257,7 +257,7 @@ struct sock { struct sk_buff *tail; int len; } sk_backlog; - wait_queue_head_t *sk_sleep; + struct socket_wq *sk_wq; struct dst_entry *sk_dst_cache; #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; @@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) static inline wait_queue_head_t *sk_sleep(struct sock *sk) { - return sk->sk_sleep; + return &sk->sk_wq->wait; } /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. @@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) write_lock_bh(&sk->sk_callback_lock); sock_set_flag(sk, SOCK_DEAD); sk_set_socket(sk, NULL); - sk->sk_sleep = NULL; + sk->sk_wq = NULL; write_unlock_bh(&sk->sk_callback_lock); } static inline void sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); - sk->sk_sleep = &parent->wait; + rcu_assign_pointer(sk->sk_wq, parent->wq); parent->sk = sk; sk_set_socket(sk, parent); security_sock_graft(sk, parent); @@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) } /** - * sk_has_sleeper - check if there are any waiting processes - * @sk: socket + * wq_has_sleeper - check if there are any waiting processes + * @sk: struct socket_wq * - * Returns true if socket has waiting processes + * Returns true if socket_wq has waiting processes * - * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) * ... ... * tp->rcv_nxt check sock_def_readable * ... { - * schedule ... - * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - * wake_up_interruptible(sk_sleep(sk)) + * schedule rcu_read_lock(); + * wq = rcu_dereference(sk->sk_wq); + * if (wq && waitqueue_active(&wq->wait)) + * wake_up_interruptible(&wq->wait) * ... * } * @@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) * could then endup calling schedule and sleep forever if there are no more * data on the socket. * - * The sk_has_sleeper is always called right after a call to read_lock, so we - * can use smp_mb__after_lock barrier. */ -static inline int sk_has_sleeper(struct sock *sk) +static inline bool wq_has_sleeper(struct socket_wq *wq) { + /* * We need to be sure we are in sync with the * add_wait_queue modifications to the wait queue. * * This memory barrier is paired in the sock_poll_wait. */ - smp_mb__after_lock(); - return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); + smp_mb(); + return wq && waitqueue_active(&wq->wait); } /** @@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) * @wait_address: socket wait queue * @p: poll_table * - * See the comments in the sk_has_sleeper function. + * See the comments in the wq_has_sleeper function. */ static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) @@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, * We need to be sure we are in sync with the * socket flags modification. * - * This memory barrier is paired in the sk_has_sleeper. + * This memory barrier is paired in the wq_has_sleeper. */ smp_mb(); } |