diff options
Diffstat (limited to 'net')
139 files changed, 3662 insertions, 2480 deletions
diff --git a/net/atm/common.c b/net/atm/common.c index 97ed94a..b43feb1 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk) static void vcc_def_wakeup(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up(sk->sk_sleep); - read_unlock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up(&wq->wait); + rcu_read_unlock(); } static inline int vcc_writable(struct sock *sk) @@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk) static void vcc_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); if (vcc_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static struct proto vcc_proto = { @@ -549,7 +555,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, } eff = (size+3) & ~3; /* align to word boundary */ - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); error = 0; while (!(skb = alloc_tx(vcc, eff))) { if (m->msg_flags & MSG_DONTWAIT) { @@ -568,9 +574,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, send_sig(SIGPIPE, current, 0); break; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; skb->dev = NULL; /* for paths shared with net_device interfaces */ @@ -595,7 +601,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6ba6e46..509c8ac 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk)); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index 3ba9a45..754ee47 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc) pr_debug("%p\n", vcc); if (test_bit(ATM_VF_REGIS, &vcc->flags)) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_close, NULL, NULL, NULL); while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } /* beware - socket is still in use by atmsigd until the last as_indicate has been answered */ @@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr, } vcc->local = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */ if (!sigd) { error = -EUNATCH; @@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, } vcc->remote = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sock->state = SS_CONNECTING; error = -EINPROGRESS; goto out; @@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); if (!signal_pending(current)) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); continue; } @@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, */ sigd_enq(vcc, as_close, NULL, NULL, NULL); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } if (!sk->sk_err) while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } @@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, error = -EINTR; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!sigd) { @@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog) goto out; } set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) while (1) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (!(skb = skb_dequeue(&sk->sk_receive_queue)) && sigd) { if (test_bit(ATM_VF_RELEASED, &old_vcc->flags)) @@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) error = -ERESTARTSYS; break; } - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!skb) { @@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) } /* wait should be short, so we ignore the non-blocking flag */ set_bit(ATM_VF_WAITING, &new_vcc->flags); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL); while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) { release_sock(sk); schedule(); lock_sock(sk); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk_atm(new_vcc)->sk_sleep, &wait); + finish_wait(sk_sleep(sk_atm(new_vcc)), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) DEFINE_WAIT(wait); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0); while (test_bit(ATM_VF_WAITING, &vcc->flags) && !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) return -EUNATCH; return -sk->sk_err; @@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_addparty, NULL, NULL, (struct sockaddr_atmsvc *) sockaddr); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = -EINPROGRESS; goto out; } pr_debug("added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = xchg(&sk->sk_err_soft, 0); out: release_sock(sk); @@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref) lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 65c5801..cfdfd7e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 404a850..421c45b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w BT_DBG("sock %p, sk %p", sock, sk); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (sk->sk_state != state) { set_current_state(TASK_INTERRUPTIBLE); @@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) break; } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 8062dad..f10b41f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -474,7 +474,7 @@ static int bnep_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&s->killed)) { set_current_state(TASK_INTERRUPTIBLE); @@ -496,7 +496,7 @@ static int bnep_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ down_write(&bnep_session_sem); @@ -507,7 +507,7 @@ static int bnep_session(void *arg) /* Wakeup user-space polling for socket errors */ s->sock->sk->sk_err = EUNATCH; - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); /* Release the socket */ fput(s->sock->file); @@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) /* Kill session thread */ atomic_inc(&s->killed); - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d48b33f..0faad5c 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -109,7 +109,7 @@ static void bnep_net_set_mc_list(struct net_device *dev) } skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); #endif } @@ -193,11 +193,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb, /* * We cannot send L2CAP packets from here as we are potentially in a bh. * So we have to queue them and wake up session thread which is sleeping - * on the sk->sk_sleep. + * on the sk_sleep(sk). */ dev->trans_start = jiffies; skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) { BT_DBG("tx queue is full"); diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index e4663aa..785e79e 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session) { struct sock *sk = session->sock->sk; - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } /* CMTP init defines */ diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0073ec8..d4c6af0 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -284,7 +284,7 @@ static int cmtp_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -301,7 +301,7 @@ static int cmtp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 280529a..bfe641b 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -561,8 +561,8 @@ static int hidp_session(void *arg) init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); - add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); - add_wait_queue(intr_sk->sk_sleep, &intr_wait); + add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); + add_wait_queue(sk_sleep(intr_sk), &intr_wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -584,8 +584,8 @@ static int hidp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(intr_sk->sk_sleep, &intr_wait); - remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); + remove_wait_queue(sk_sleep(intr_sk), &intr_wait); + remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); down_write(&hidp_session_sem); @@ -609,7 +609,7 @@ static int hidp_session(void *arg) fput(session->intr_sock->file); - wait_event_timeout(*(ctrl_sk->sk_sleep), + wait_event_timeout(*(sk_sleep(ctrl_sk)), (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); fput(session->ctrl_sock->file); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index a4e215d..8d934a1 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; - wake_up_interruptible(ctrl_sk->sk_sleep); - wake_up_interruptible(intr_sk->sk_sleep); + wake_up_interruptible(sk_sleep(ctrl_sk)); + wake_up_interruptible(sk_sleep(intr_sk)); } /* HIDP init defines */ diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 99d68c3..864c76f 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1147,7 +1147,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -1170,7 +1170,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -1626,7 +1626,10 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms /* Connectionless channel */ if (sk->sk_type == SOCK_DGRAM) { skb = l2cap_create_connless_pdu(sk, msg, len); - err = l2cap_do_send(sk, skb); + if (IS_ERR(skb)) + err = PTR_ERR(skb); + else + err = l2cap_do_send(sk, skb); goto done; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8ed3c37..43fbf6b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); @@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return timeo; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ca6b2ad..b406d3e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -567,7 +567,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(ch = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -590,7 +590,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index d115d5c..9190ae4 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -33,14 +33,14 @@ config BRIDGE If unsure, say N. config BRIDGE_IGMP_SNOOPING - bool "IGMP snooping" + bool "IGMP/MLD snooping" depends on BRIDGE depends on INET default y ---help--- If you say Y here, then the Ethernet bridge will be able selectively - forward multicast traffic based on IGMP traffic received from each - port. + forward multicast traffic based on IGMP/MLD traffic received from + each port. Say N to exclude this support and reduce the binary size. diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 007bde8..f15f9c4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -13,9 +13,12 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/list.h> #include <linux/netfilter_bridge.h> + #include <asm/uaccess.h> #include "br_private.h" @@ -43,7 +46,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - if (dest[0] & 1) { + if (is_multicast_ether_addr(dest)) { if (br_multicast_rcv(br, NULL, skb)) goto out; @@ -195,6 +198,59 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) return 0; } +#ifdef CONFIG_NET_POLL_CONTROLLER +bool br_devices_support_netpoll(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool ret = true; + int count = 0; + unsigned long flags; + + spin_lock_irqsave(&br->lock, flags); + list_for_each_entry(p, &br->port_list, list) { + count++; + if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) || + !p->dev->netdev_ops->ndo_poll_controller) + ret = false; + } + spin_unlock_irqrestore(&br->lock, flags); + return count != 0 && ret; +} + +static void br_poll_controller(struct net_device *br_dev) +{ + struct netpoll *np = br_dev->npinfo->netpoll; + + if (np->real_dev != br_dev) + netpoll_poll_dev(np->real_dev); +} + +void br_netpoll_cleanup(struct net_device *br_dev) +{ + struct net_bridge *br = netdev_priv(br_dev); + struct net_bridge_port *p, *n; + const struct net_device_ops *ops; + + br->dev->npinfo = NULL; + list_for_each_entry_safe(p, n, &br->port_list, list) { + if (p->dev) { + ops = p->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(p->dev); + else + p->dev->npinfo = NULL; + } + } +} + +#else + +void br_netpoll_cleanup(struct net_device *br_dev) +{ +} + +#endif + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -218,6 +274,10 @@ static const struct net_device_ops br_netdev_ops = { .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_cleanup = br_netpoll_cleanup, + .ndo_poll_controller = br_poll_controller, +#endif }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 92fb329..a98ef13 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> @@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) else { skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) { + netpoll_send_skb(skb->dev->npinfo->netpoll, skb); + skb->dev->priv_flags &= ~IFF_IN_NETPOLL; + } else +#endif + dev_queue_xmit(skb); } } @@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { +#ifdef CONFIG_NET_POLL_CONTROLLER + struct net_bridge *br = to->br; + if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) { + struct netpoll *np; + to->dev->npinfo = skb->dev->npinfo; + np = skb->dev->npinfo->netpoll; + np->real_dev = np->dev = to->dev; + to->dev->priv_flags |= IFF_IN_NETPOLL; + } +#endif skb->dev = to->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, br_forward_finish); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (skb->dev->npinfo) + skb->dev->npinfo->netpoll->dev = br->dev; +#endif } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) @@ -208,17 +229,15 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *port; - struct net_bridge_port *lport, *rport; - struct net_bridge_port *prev; + struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; struct hlist_node *rp; - prev = NULL; - - rp = br->router_list.first; - p = mdst ? mdst->ports : NULL; + rp = rcu_dereference(br->router_list.first); + p = mdst ? rcu_dereference(mdst->ports) : NULL; while (p || rp) { + struct net_bridge_port *port, *lport, *rport; + lport = p ? p->port : NULL; rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : NULL; @@ -231,9 +250,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, goto out; if ((unsigned long)lport >= (unsigned long)port) - p = p->next; + p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) - rp = rp->next; + rp = rcu_dereference(rp->next); } if (!prev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 5214393..537bdd6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/module.h> @@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_port *p) kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (br_devices_support_netpoll(br)) + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (dev->netdev_ops->ndo_netpoll_cleanup) + dev->netdev_ops->ndo_netpoll_cleanup(dev); + else + dev->npinfo = NULL; +#endif call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br, struct list_head *head) del_nbp(p); } + br_netpoll_cleanup(br->dev); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); @@ -444,6 +455,20 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (br_devices_support_netpoll(br)) { + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (br->dev->npinfo) + dev->npinfo = br->dev->npinfo; + } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { + br->dev->priv_flags |= IFF_DISABLE_NETPOLL; + printk(KERN_INFO "New device %s does not support netpoll\n", + dev->name); + printk(KERN_INFO "Disabling netpoll for %s\n", + br->dev->name); + } +#endif + return 0; err2: br_fdb_delete_by_port(br, p, 1); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8ccdb8e..c8419e2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -24,51 +24,139 @@ #include <linux/slab.h> #include <linux/timer.h> #include <net/ip.h> +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> +#endif #include "br_private.h" -static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline int ipv6_is_local_multicast(const struct in6_addr *addr) { - return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1); + if (ipv6_addr_is_multicast(addr) && + IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL) + return 1; + return 0; +} +#endif + +static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) +{ + if (a->proto != b->proto) + return 0; + switch (a->proto) { + case htons(ETH_P_IP): + return a->u.ip4 == b->u.ip4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); +#endif + } + return 0; +} + +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +{ + return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, + const struct in6_addr *ip) +{ + return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); +} +#endif + +static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, + struct br_ip *ip) +{ + switch (ip->proto) { + case htons(ETH_P_IP): + return __br_ip4_hash(mdb, ip->u.ip4); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return __br_ip6_hash(mdb, &ip->u.ip6); +#endif + } + return 0; } static struct net_bridge_mdb_entry *__br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, __be32 dst, int hash) + struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) { struct net_bridge_mdb_entry *mp; struct hlist_node *p; hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { - if (dst == mp->addr) + if (br_ip_equal(&mp->addr, dst)) return mp; } return NULL; } -static struct net_bridge_mdb_entry *br_mdb_ip_get( +static struct net_bridge_mdb_entry *br_mdb_ip4_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { - if (!mdb) - return NULL; + struct br_ip br_dst; + + br_dst.u.ip4 = dst; + br_dst.proto = htons(ETH_P_IP); + + return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst)); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static struct net_bridge_mdb_entry *br_mdb_ip6_get( + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) +{ + struct br_ip br_dst; + ipv6_addr_copy(&br_dst.u.ip6, dst); + br_dst.proto = htons(ETH_P_IPV6); + + return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst)); +} +#endif + +static struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +{ return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); } struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { - if (br->multicast_disabled) + struct net_bridge_mdb_htable *mdb = br->mdb; + struct br_ip ip; + + if (!mdb || br->multicast_disabled) + return NULL; + + if (BR_INPUT_SKB_CB(skb)->igmp) return NULL; + ip.proto = skb->protocol; + switch (skb->protocol) { case htons(ETH_P_IP): - if (BR_INPUT_SKB_CB(skb)->igmp) - break; - return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr); + ip.u.ip4 = ip_hdr(skb)->daddr; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr); + break; +#endif + default: + return NULL; } - return NULL; + return br_mdb_ip_get(mdb, &ip); } static void br_mdb_free(struct rcu_head *head) @@ -95,7 +183,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, for (i = 0; i < old->max; i++) hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) hlist_add_head(&mp->hlist[new->ver], - &new->mhash[br_ip_hash(new, mp->addr)]); + &new->mhash[br_ip_hash(new, &mp->addr)]); if (!elasticity) return 0; @@ -163,7 +251,7 @@ static void br_multicast_del_pg(struct net_bridge *br, struct net_bridge_port_group *p; struct net_bridge_port_group **pp; - mp = br_mdb_ip_get(mdb, pg->addr); + mp = br_mdb_ip_get(mdb, &pg->addr); if (WARN_ON(!mp)) return; @@ -171,7 +259,7 @@ static void br_multicast_del_pg(struct net_bridge *br, if (p != pg) continue; - *pp = p->next; + rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); del_timer(&p->query_timer); @@ -249,8 +337,8 @@ out: return 0; } -static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, - __be32 group) +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, + __be32 group) { struct sk_buff *skb; struct igmphdr *ih; @@ -314,12 +402,104 @@ out: return skb; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, + struct in6_addr *group) +{ + struct sk_buff *skb; + struct ipv6hdr *ip6h; + struct mld_msg *mldq; + struct ethhdr *eth; + u8 *hopopt; + unsigned long interval; + + skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + + 8 + sizeof(*mldq)); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IPV6); + + /* Ethernet header */ + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + memcpy(eth->h_source, br->dev->dev_addr, 6); + ipv6_eth_mc_map(group, eth->h_dest); + eth->h_proto = htons(ETH_P_IPV6); + skb_put(skb, sizeof(*eth)); + + /* IPv6 header + HbH option */ + skb_set_network_header(skb, skb->len); + ip6h = ipv6_hdr(skb); + + *(__force __be32 *)ip6h = htonl(0x60000000); + ip6h->payload_len = 8 + sizeof(*mldq); + ip6h->nexthdr = IPPROTO_HOPOPTS; + ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + + hopopt = (u8 *)(ip6h + 1); + hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ + hopopt[1] = 0; /* length of HbH */ + hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ + hopopt[3] = 2; /* Length of RA Option */ + hopopt[4] = 0; /* Type = 0x0000 (MLD) */ + hopopt[5] = 0; + hopopt[6] = IPV6_TLV_PAD0; /* Pad0 */ + hopopt[7] = IPV6_TLV_PAD0; /* Pad0 */ + + skb_put(skb, sizeof(*ip6h) + 8); + + /* ICMPv6 */ + skb_set_transport_header(skb, skb->len); + mldq = (struct mld_msg *) icmp6_hdr(skb); + + interval = ipv6_addr_any(group) ? br->multicast_last_member_interval : + br->multicast_query_response_interval; + + mldq->mld_type = ICMPV6_MGM_QUERY; + mldq->mld_code = 0; + mldq->mld_cksum = 0; + mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); + mldq->mld_reserved = 0; + ipv6_addr_copy(&mldq->mld_mca, group); + + /* checksum */ + mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mldq), IPPROTO_ICMPV6, + csum_partial(mldq, + sizeof(*mldq), 0)); + skb_put(skb, sizeof(*mldq)); + + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} +#endif + +static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, + struct br_ip *addr) +{ + switch (addr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_alloc_query(br, addr->u.ip4); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return br_ip6_multicast_alloc_query(br, &addr->u.ip6); +#endif + } + return NULL; +} + static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) { struct net_bridge *br = mp->br; struct sk_buff *skb; - skb = br_multicast_alloc_query(br, mp->addr); + skb = br_multicast_alloc_query(br, &mp->addr); if (!skb) goto timer; @@ -353,7 +533,7 @@ static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg) struct net_bridge *br = port->br; struct sk_buff *skb; - skb = br_multicast_alloc_query(br, pg->addr); + skb = br_multicast_alloc_query(br, &pg->addr); if (!skb) goto timer; @@ -383,8 +563,8 @@ out: } static struct net_bridge_mdb_entry *br_multicast_get_group( - struct net_bridge *br, struct net_bridge_port *port, __be32 group, - int hash) + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group, int hash) { struct net_bridge_mdb_htable *mdb = br->mdb; struct net_bridge_mdb_entry *mp; @@ -396,9 +576,8 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { count++; - if (unlikely(group == mp->addr)) { + if (unlikely(br_ip_equal(group, &mp->addr))) return mp; - } } elasticity = 0; @@ -463,7 +642,8 @@ err: } static struct net_bridge_mdb_entry *br_multicast_new_group( - struct net_bridge *br, struct net_bridge_port *port, __be32 group) + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group) { struct net_bridge_mdb_htable *mdb = br->mdb; struct net_bridge_mdb_entry *mp; @@ -496,7 +676,7 @@ rehash: goto out; mp->br = br; - mp->addr = group; + mp->addr = *group; setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); setup_timer(&mp->query_timer, br_multicast_group_query_expired, @@ -510,7 +690,8 @@ out: } static int br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, __be32 group) + struct net_bridge_port *port, + struct br_ip *group) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -518,9 +699,6 @@ static int br_multicast_add_group(struct net_bridge *br, unsigned long now = jiffies; int err; - if (ipv4_is_local_multicast(group)) - return 0; - spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED)) @@ -549,7 +727,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (unlikely(!p)) goto err; - p->addr = group; + p->addr = *group; p->port = port; p->next = *pp; hlist_add_head(&p->mglist, &port->mglist); @@ -570,6 +748,38 @@ err: return err; } +static int br_ip4_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group) +{ + struct br_ip br_group; + + if (ipv4_is_local_multicast(group)) + return 0; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + + return br_multicast_add_group(br, port, &br_group); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group) +{ + struct br_ip br_group; + + if (ipv6_is_local_multicast(group)) + return 0; + + ipv6_addr_copy(&br_group.u.ip6, group); + br_group.proto = htons(ETH_P_IP); + + return br_multicast_add_group(br, port, &br_group); +} +#endif + static void br_multicast_router_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; @@ -591,19 +801,15 @@ static void br_multicast_local_router_expired(unsigned long data) { } -static void br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, u32 sent) +static void __br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *ip) { - unsigned long time; struct sk_buff *skb; - if (!netif_running(br->dev) || br->multicast_disabled || - timer_pending(&br->multicast_querier_timer)) - return; - - skb = br_multicast_alloc_query(br, 0); + skb = br_multicast_alloc_query(br, ip); if (!skb) - goto timer; + return; if (port) { __skb_push(skb, sizeof(struct ethhdr)); @@ -612,8 +818,28 @@ static void br_multicast_send_query(struct net_bridge *br, dev_queue_xmit); } else netif_rx(skb); +} + +static void br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, u32 sent) +{ + unsigned long time; + struct br_ip br_group; + + if (!netif_running(br->dev) || br->multicast_disabled || + timer_pending(&br->multicast_querier_timer)) + return; + + memset(&br_group.u, 0, sizeof(br_group.u)); + + br_group.proto = htons(ETH_P_IP); + __br_multicast_send_query(br, port, &br_group); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + br_group.proto = htons(ETH_P_IPV6); + __br_multicast_send_query(br, port, &br_group); +#endif -timer: time = jiffies; time += sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : @@ -698,9 +924,9 @@ void br_multicast_disable_port(struct net_bridge_port *port) spin_unlock(&br->multicast_lock); } -static int br_multicast_igmp3_report(struct net_bridge *br, - struct net_bridge_port *port, - struct sk_buff *skb) +static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) { struct igmpv3_report *ih; struct igmpv3_grec *grec; @@ -727,7 +953,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br, group = grec->grec_mca; type = grec->grec_type; - len += grec->grec_nsrcs * 4; + len += ntohs(grec->grec_nsrcs) * 4; if (!pskb_may_pull(skb, len)) return -EINVAL; @@ -745,7 +971,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_multicast_add_group(br, port, group); + err = br_ip4_multicast_add_group(br, port, group); if (err) break; } @@ -753,24 +979,87 @@ static int br_multicast_igmp3_report(struct net_bridge *br, return err; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_mld2_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct icmp6hdr *icmp6h; + struct mld2_grec *grec; + int i; + int len; + int num; + int err = 0; + + if (!pskb_may_pull(skb, sizeof(*icmp6h))) + return -EINVAL; + + icmp6h = icmp6_hdr(skb); + num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); + len = sizeof(*icmp6h); + + for (i = 0; i < num; i++) { + __be16 *nsrcs, _nsrcs; + + nsrcs = skb_header_pointer(skb, + len + offsetof(struct mld2_grec, + grec_mca), + sizeof(_nsrcs), &_nsrcs); + if (!nsrcs) + return -EINVAL; + + if (!pskb_may_pull(skb, + len + sizeof(*grec) + + sizeof(struct in6_addr) * (*nsrcs))) + return -EINVAL; + + grec = (struct mld2_grec *)(skb->data + len); + len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); + + /* We treat these as MLDv1 reports for now. */ + switch (grec->grec_type) { + case MLD2_MODE_IS_INCLUDE: + case MLD2_MODE_IS_EXCLUDE: + case MLD2_CHANGE_TO_INCLUDE: + case MLD2_CHANGE_TO_EXCLUDE: + case MLD2_ALLOW_NEW_SOURCES: + case MLD2_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); + if (!err) + break; + } + + return err; +} +#endif + +/* + * Add port to rotuer_list + * list is maintained ordered by pointer value + * and locked by br->multicast_lock and RCU + */ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port) { - struct hlist_node *p; - struct hlist_node **h; - - for (h = &br->router_list.first; - (p = *h) && - (unsigned long)container_of(p, struct net_bridge_port, rlist) > - (unsigned long)port; - h = &p->next) - ; - - port->rlist.pprev = h; - port->rlist.next = p; - rcu_assign_pointer(*h, &port->rlist); - if (p) - p->pprev = &port->rlist.next; + struct net_bridge_port *p; + struct hlist_node *n, *slot = NULL; + + hlist_for_each_entry(p, n, &br->router_list, rlist) { + if ((unsigned long) port >= (unsigned long) p) + break; + slot = n; + } + + if (slot) + hlist_add_after_rcu(slot, &port->rlist); + else + hlist_add_head_rcu(&port->rlist, &br->router_list); } static void br_multicast_mark_router(struct net_bridge *br, @@ -800,7 +1089,7 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - __be32 saddr) + int saddr) { if (saddr) mod_timer(&br->multicast_querier_timer, @@ -811,9 +1100,9 @@ static void br_multicast_query_received(struct net_bridge *br, br_multicast_mark_router(br, port); } -static int br_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, - struct sk_buff *skb) +static int br_ip4_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -831,7 +1120,7 @@ static int br_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, iph->saddr); + br_multicast_query_received(br, port, !!iph->saddr); group = ih->group; @@ -859,7 +1148,7 @@ static int br_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip_get(br->mdb, group); + mp = br_mdb_ip4_get(br->mdb, group); if (!mp) goto out; @@ -883,9 +1172,78 @@ out: return err; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); + struct net_bridge_mdb_entry *mp; + struct mld2_query *mld2q; + struct net_bridge_port_group *p, **pp; + unsigned long max_delay; + unsigned long now = jiffies; + struct in6_addr *group = NULL; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); + + if (skb->len == sizeof(*mld)) { + if (!pskb_may_pull(skb, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *) icmp6_hdr(skb); + max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay)); + if (max_delay) + group = &mld->mld_mca; + } else if (skb->len >= sizeof(*mld2q)) { + if (!pskb_may_pull(skb, sizeof(*mld2q))) { + err = -EINVAL; + goto out; + } + mld2q = (struct mld2_query *)icmp6_hdr(skb); + if (!mld2q->mld2q_nsrcs) + group = &mld2q->mld2q_mca; + max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1; + } + + if (!group) + goto out; + + mp = br_mdb_ip6_get(br->mdb, group); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + if (!hlist_unhashed(&mp->mglist) && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; (p = *pp); pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0) + mod_timer(&mp->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} +#endif + static void br_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -893,9 +1251,6 @@ static void br_multicast_leave_group(struct net_bridge *br, unsigned long now; unsigned long time; - if (ipv4_is_local_multicast(group)) - return; - spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || @@ -946,6 +1301,38 @@ out: spin_unlock(&br->multicast_lock); } +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group) +{ + struct br_ip br_group; + + if (ipv4_is_local_multicast(group)) + return; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + + br_multicast_leave_group(br, port, &br_group); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group) +{ + struct br_ip br_group; + + if (ipv6_is_local_multicast(group)) + return; + + ipv6_addr_copy(&br_group.u.ip6, group); + br_group.proto = htons(ETH_P_IPV6); + + br_multicast_leave_group(br, port, &br_group); +} +#endif + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) @@ -957,9 +1344,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned offset; int err; - BR_INPUT_SKB_CB(skb)->igmp = 0; - BR_INPUT_SKB_CB(skb)->mrouters_only = 0; - /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) return -EINVAL; @@ -1023,16 +1407,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; - err = br_multicast_add_group(br, port, ih->group); + err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_multicast_igmp3_report(br, port, skb2); + err = br_ip4_multicast_igmp3_report(br, port, skb2); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_multicast_query(br, port, skb2); + err = br_ip4_multicast_query(br, port, skb2); break; case IGMP_HOST_LEAVE_MESSAGE: - br_multicast_leave_group(br, port, ih->group); + br_ip4_multicast_leave_group(br, port, ih->group); break; } @@ -1044,15 +1428,139 @@ err_out: return err; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_multicast_ipv6_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct sk_buff *skb2 = skb; + struct ipv6hdr *ip6h; + struct icmp6hdr *icmp6h; + u8 nexthdr; + unsigned len; + unsigned offset; + int err; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + /* + * We're interested in MLD messages only. + * - Version is 6 + * - MLD has always Router Alert hop-by-hop option + * - But we do not support jumbrograms. + */ + if (ip6h->version != 6 || + ip6h->nexthdr != IPPROTO_HOPOPTS || + ip6h->payload_len == 0) + return 0; + + len = ntohs(ip6h->payload_len); + if (skb->len < len) + return -EINVAL; + + nexthdr = ip6h->nexthdr; + offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr); + + if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + return 0; + + /* Okay, we found ICMPv6 header */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + return -ENOMEM; + + len -= offset - skb_network_offset(skb2); + + __skb_pull(skb2, offset); + skb_reset_transport_header(skb2); + + err = -EINVAL; + if (!pskb_may_pull(skb2, sizeof(*icmp6h))) + goto out; + + icmp6h = icmp6_hdr(skb2); + + switch (icmp6h->icmp6_type) { + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MLD2_REPORT: + break; + default: + err = 0; + goto out; + } + + /* Okay, we found MLD message. Check further. */ + if (skb2->len > len) { + err = pskb_trim_rcsum(skb2, len); + if (err) + goto out; + } + + switch (skb2->ip_summed) { + case CHECKSUM_COMPLETE: + if (!csum_fold(skb2->csum)) + break; + /*FALLTHROUGH*/ + case CHECKSUM_NONE: + skb2->csum = 0; + if (skb_checksum_complete(skb2)) + goto out; + } + + err = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 1; + + switch (icmp6h->icmp6_type) { + case ICMPV6_MGM_REPORT: + { + struct mld_msg *mld = (struct mld_msg *)icmp6h; + BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); + break; + } + case ICMPV6_MLD2_REPORT: + err = br_ip6_multicast_mld2_report(br, port, skb2); + break; + case ICMPV6_MGM_QUERY: + err = br_ip6_multicast_query(br, port, skb2); + break; + case ICMPV6_MGM_REDUCTION: + { + struct mld_msg *mld = (struct mld_msg *)icmp6h; + br_ip6_multicast_leave_group(br, port, &mld->mld_mca); + } + } + +out: + __skb_push(skb2, offset); + if (skb2 != skb) + kfree_skb(skb2); + return err; +} +#endif + int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { + BR_INPUT_SKB_CB(skb)->igmp = 0; + BR_INPUT_SKB_CB(skb)->mrouters_only = 0; + if (br->multicast_disabled) return 0; switch (skb->protocol) { case htons(ETH_P_IP): return br_multicast_ipv4_rcv(br, port, skb); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return br_multicast_ipv6_rcv(br, port, skb); +#endif } return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 791d4ab..3d2d3fe 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -45,6 +45,17 @@ struct mac_addr unsigned char addr[6]; }; +struct br_ip +{ + union { + __be32 ip4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr ip6; +#endif + } u; + __be16 proto; +}; + struct net_bridge_fdb_entry { struct hlist_node hlist; @@ -64,7 +75,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; - __be32 addr; + struct br_ip addr; u32 queries_sent; }; @@ -77,7 +88,7 @@ struct net_bridge_mdb_entry struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; - __be32 addr; + struct br_ip addr; u32 queries_sent; }; @@ -130,19 +141,20 @@ struct net_bridge_port #endif }; +struct br_cpu_netstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct br_cpu_netstats __percpu { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; - } *stats; - + struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; unsigned long feature_mask; @@ -241,6 +253,8 @@ static inline int br_is_root_bridge(const struct net_bridge *br) extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +extern bool br_devices_support_netpoll(struct net_bridge *br); +extern void br_netpoll_cleanup(struct net_device *br_dev); /* br_fdb.c */ extern int br_fdb_init(void); diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index e84837e..024fd5b 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -247,10 +247,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, switch (caifdev->link_select) { case CAIF_LINK_HIGH_BANDW: - pref = CFPHYPREF_LOW_LAT; + pref = CFPHYPREF_HIGH_BW; break; case CAIF_LINK_LOW_LATENCY: - pref = CFPHYPREF_HIGH_BW; + pref = CFPHYPREF_LOW_LAT; break; default: pref = CFPHYPREF_HIGH_BW; @@ -330,23 +330,28 @@ int caif_connect_client(struct caif_connect_request *conn_req, struct cflayer *client_layer) { struct cfctrl_link_param param; - if (connect_req_to_link_param(get_caif_conf(), conn_req, ¶m) == 0) - /* Hook up the adaptation layer. */ - return cfcnfg_add_adaptation_layer(get_caif_conf(), + int ret; + ret = connect_req_to_link_param(get_caif_conf(), conn_req, ¶m); + if (ret) + return ret; + /* Hook up the adaptation layer. */ + return cfcnfg_add_adaptation_layer(get_caif_conf(), ¶m, client_layer); - - return -EINVAL; - - caif_assert(0); } EXPORT_SYMBOL(caif_connect_client); int caif_disconnect_client(struct cflayer *adap_layer) { - return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer); + return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer); } EXPORT_SYMBOL(caif_disconnect_client); +void caif_release_client(struct cflayer *adap_layer) +{ + cfcnfg_release_adap_layer(adap_layer); +} +EXPORT_SYMBOL(caif_release_client); + /* Per-namespace Caif devices handling */ static int caif_init_net(struct net *net) { diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cdf62b9..c3a70c5 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1,7 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland sjur.brandeland@stericsson.com - * Per Sigmond per.sigmond@stericsson.com * License terms: GNU General Public License (GPL) version 2 */ @@ -16,91 +15,52 @@ #include <linux/poll.h> #include <linux/tcp.h> #include <linux/uaccess.h> -#include <asm/atomic.h> - +#include <linux/mutex.h> +#include <linux/debugfs.h> #include <linux/caif/caif_socket.h> +#include <asm/atomic.h> +#include <net/sock.h> +#include <net/tcp_states.h> #include <net/caif/caif_layer.h> #include <net/caif/caif_dev.h> #include <net/caif/cfpkt.h> MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(AF_CAIF); + +#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10) +#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100) + +/* + * CAIF state is re-using the TCP socket states. + * caif_states stored in sk_state reflect the state as reported by + * the CAIF stack, while sk_socket->state is the state of the socket. + */ +enum caif_states { + CAIF_CONNECTED = TCP_ESTABLISHED, + CAIF_CONNECTING = TCP_SYN_SENT, + CAIF_DISCONNECTED = TCP_CLOSE +}; + +#define TX_FLOW_ON_BIT 1 +#define RX_FLOW_ON_BIT 2 -#define CHNL_SKT_READ_QUEUE_HIGH 200 -#define CHNL_SKT_READ_QUEUE_LOW 100 - -static int caif_sockbuf_size = 40000; -static atomic_t caif_nr_socks = ATOMIC_INIT(0); - -#define CONN_STATE_OPEN_BIT 1 -#define CONN_STATE_PENDING_BIT 2 -#define CONN_STATE_PEND_DESTROY_BIT 3 -#define CONN_REMOTE_SHUTDOWN_BIT 4 - -#define TX_FLOW_ON_BIT 1 -#define RX_FLOW_ON_BIT 2 - -#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\ - (void *) &(cf_sk)->conn_state) -#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\ - (void *) &(cf_sk)->conn_state) -#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\ - (void *) &(cf_sk)->conn_state) -#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\ - (void *) &(cf_sk)->conn_state) - -#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\ - (void *) &(cf_sk)->conn_state) -#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\ - (void *) &(cf_sk)->conn_state) -#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\ - (void *) &(cf_sk)->conn_state) -#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\ - (void *) &(cf_sk)->conn_state) -#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\ - (void *) &(cf_sk)->conn_state) -#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\ - (void *) &(cf_sk)->conn_state) - -#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\ - (void *) &(dev)->conn_state) -#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) -#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) - -#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) -#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) -#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) -#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\ - (void *) &(cf_sk)->flow_state) - -#define SKT_READ_FLAG 0x01 -#define SKT_WRITE_FLAG 0x02 static struct dentry *debugfsdir; -#include <linux/debugfs.h> #ifdef CONFIG_DEBUG_FS struct debug_fs_counter { - atomic_t num_open; - atomic_t num_close; - atomic_t num_init; - atomic_t num_init_resp; - atomic_t num_init_fail_resp; - atomic_t num_deinit; - atomic_t num_deinit_resp; + atomic_t caif_nr_socks; + atomic_t num_connect_req; + atomic_t num_connect_resp; + atomic_t num_connect_fail_resp; + atomic_t num_disconnect; atomic_t num_remote_shutdown_ind; atomic_t num_tx_flow_off_ind; atomic_t num_tx_flow_on_ind; atomic_t num_rx_flow_off; atomic_t num_rx_flow_on; - atomic_t skb_in_use; - atomic_t skb_alloc; - atomic_t skb_free; }; -static struct debug_fs_counter cnt; +struct debug_fs_counter cnt; #define dbfs_atomic_inc(v) atomic_inc(v) #define dbfs_atomic_dec(v) atomic_dec(v) #else @@ -108,624 +68,666 @@ static struct debug_fs_counter cnt; #define dbfs_atomic_dec(v) #endif -/* The AF_CAIF socket */ struct caifsock { - /* NOTE: sk has to be the first member */ - struct sock sk; + struct sock sk; /* must be first member */ struct cflayer layer; - char name[CAIF_LAYER_NAME_SZ]; - u32 conn_state; + char name[CAIF_LAYER_NAME_SZ]; /* Used for debugging */ u32 flow_state; - struct cfpktq *pktq; - int file_mode; struct caif_connect_request conn_req; - int read_queue_len; - /* protect updates of read_queue_len */ - spinlock_t read_queue_len_lock; + struct mutex readlock; struct dentry *debugfs_socket_dir; }; -static void drain_queue(struct caifsock *cf_sk); +static int rx_flow_is_on(struct caifsock *cf_sk) +{ + return test_bit(RX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} + +static int tx_flow_is_on(struct caifsock *cf_sk) +{ + return test_bit(TX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} -/* Packet Receive Callback function called from CAIF Stack */ -static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt) +static void set_rx_flow_off(struct caifsock *cf_sk) { - struct caifsock *cf_sk; - int read_queue_high; - cf_sk = container_of(layr, struct caifsock, layer); + clear_bit(RX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} - if (!STATE_IS_OPEN(cf_sk)) { - /*FIXME: This should be allowed finally!*/ - pr_debug("CAIF: %s(): called after close request\n", __func__); - cfpkt_destroy(pkt); - return 0; - } - /* NOTE: This function may be called in Tasklet context! */ +static void set_rx_flow_on(struct caifsock *cf_sk) +{ + set_bit(RX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} - /* The queue has its own lock */ - cfpkt_queue(cf_sk->pktq, pkt, 0); +static void set_tx_flow_off(struct caifsock *cf_sk) +{ + clear_bit(TX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} - spin_lock(&cf_sk->read_queue_len_lock); - cf_sk->read_queue_len++; +static void set_tx_flow_on(struct caifsock *cf_sk) +{ + set_bit(TX_FLOW_ON_BIT, + (void *) &cf_sk->flow_state); +} - read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH); - spin_unlock(&cf_sk->read_queue_len_lock); +static void caif_read_lock(struct sock *sk) +{ + struct caifsock *cf_sk; + cf_sk = container_of(sk, struct caifsock, sk); + mutex_lock(&cf_sk->readlock); +} - if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) { - dbfs_atomic_inc(&cnt.num_rx_flow_off); - SET_RX_FLOW_OFF(cf_sk); +static void caif_read_unlock(struct sock *sk) +{ + struct caifsock *cf_sk; + cf_sk = container_of(sk, struct caifsock, sk); + mutex_unlock(&cf_sk->readlock); +} - /* Send flow off (NOTE: must not sleep) */ - pr_debug("CAIF: %s():" - " sending flow OFF (queue len = %d)\n", - __func__, - cf_sk->read_queue_len); - caif_assert(cf_sk->layer.dn); - caif_assert(cf_sk->layer.dn->ctrlcmd); +int sk_rcvbuf_lowwater(struct caifsock *cf_sk) +{ + /* A quarter of full buffer is used a low water mark */ + return cf_sk->sk.sk_rcvbuf / 4; +} - (void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, - CAIF_MODEMCMD_FLOW_OFF_REQ); - } +void caif_flow_ctrl(struct sock *sk, int mode) +{ + struct caifsock *cf_sk; + cf_sk = container_of(sk, struct caifsock, sk); + if (cf_sk->layer.dn) + cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode); +} - /* Signal reader that data is available. */ +/* + * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are + * not dropped, but CAIF is sending flow off instead. + */ +int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + int skb_len; + unsigned long flags; + struct sk_buff_head *list = &sk->sk_receive_queue; + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - wake_up_interruptible(cf_sk->sk.sk_sleep); + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { + trace_printk("CAIF: %s():" + " sending flow OFF (queue len = %d %d)\n", + __func__, + atomic_read(&cf_sk->sk.sk_rmem_alloc), + sk_rcvbuf_lowwater(cf_sk)); + set_rx_flow_off(cf_sk); + if (cf_sk->layer.dn) + cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, + CAIF_MODEMCMD_FLOW_OFF_REQ); + } + err = sk_filter(sk, skb); + if (err) + return err; + if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { + set_rx_flow_off(cf_sk); + trace_printk("CAIF: %s():" + " sending flow OFF due to rmem_schedule\n", + __func__); + if (cf_sk->layer.dn) + cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, + CAIF_MODEMCMD_FLOW_OFF_REQ); + } + skb->dev = NULL; + skb_set_owner_r(skb, sk); + /* Cache the SKB length before we tack it onto the receive + * queue. Once it is added it no longer belongs to us and + * may be freed by other threads of control pulling packets + * from the queue. + */ + skb_len = skb->len; + spin_lock_irqsave(&list->lock, flags); + if (!sock_flag(sk, SOCK_DEAD)) + __skb_queue_tail(list, skb); + spin_unlock_irqrestore(&list->lock, flags); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb_len); + else + kfree_skb(skb); return 0; } -/* Packet Flow Control Callback function called from CAIF */ -static void caif_sktflowctrl_cb(struct cflayer *layr, - enum caif_ctrlcmd flow, - int phyid) +/* Packet Receive Callback function called from CAIF Stack */ +static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct caifsock *cf_sk; - - /* NOTE: This function may be called in Tasklet context! */ - pr_debug("CAIF: %s(): flowctrl func called: %s.\n", - __func__, - flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : - flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : - flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" : - flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" : - flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" : - flow == - CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" : - "UKNOWN CTRL COMMAND"); - - if (layr == NULL) - return; + struct sk_buff *skb; cf_sk = container_of(layr, struct caifsock, layer); + skb = cfpkt_tonative(pkt); + + if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) { + cfpkt_destroy(pkt); + return 0; + } + caif_queue_rcv_skb(&cf_sk->sk, skb); + return 0; +} +/* Packet Control Callback function called from CAIF */ +static void caif_ctrl_cb(struct cflayer *layr, + enum caif_ctrlcmd flow, + int phyid) +{ + struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); switch (flow) { case CAIF_CTRLCMD_FLOW_ON_IND: + /* OK from modem to start sending again */ dbfs_atomic_inc(&cnt.num_tx_flow_on_ind); - /* Signal reader that data is available. */ - SET_TX_FLOW_ON(cf_sk); - wake_up_interruptible(cf_sk->sk.sk_sleep); + set_tx_flow_on(cf_sk); + cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_FLOW_OFF_IND: + /* Modem asks us to shut up */ dbfs_atomic_inc(&cnt.num_tx_flow_off_ind); - SET_TX_FLOW_OFF(cf_sk); + set_tx_flow_off(cf_sk); + cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_INIT_RSP: - dbfs_atomic_inc(&cnt.num_init_resp); - /* Signal reader that data is available. */ - caif_assert(STATE_IS_OPEN(cf_sk)); - SET_PENDING_OFF(cf_sk); - SET_TX_FLOW_ON(cf_sk); - wake_up_interruptible(cf_sk->sk.sk_sleep); + /* We're now connected */ + dbfs_atomic_inc(&cnt.num_connect_resp); + cf_sk->sk.sk_state = CAIF_CONNECTED; + set_tx_flow_on(cf_sk); + cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_DEINIT_RSP: - dbfs_atomic_inc(&cnt.num_deinit_resp); - caif_assert(!STATE_IS_OPEN(cf_sk)); - SET_PENDING_OFF(cf_sk); - if (!STATE_IS_PENDING_DESTROY(cf_sk)) { - if (cf_sk->sk.sk_sleep != NULL) - wake_up_interruptible(cf_sk->sk.sk_sleep); - } - dbfs_atomic_inc(&cnt.num_deinit); - sock_put(&cf_sk->sk); + /* We're now disconnected */ + cf_sk->sk.sk_state = CAIF_DISCONNECTED; + cf_sk->sk.sk_state_change(&cf_sk->sk); + cfcnfg_release_adap_layer(&cf_sk->layer); break; case CAIF_CTRLCMD_INIT_FAIL_RSP: - dbfs_atomic_inc(&cnt.num_init_fail_resp); - caif_assert(STATE_IS_OPEN(cf_sk)); - SET_STATE_CLOSED(cf_sk); - SET_PENDING_OFF(cf_sk); - SET_TX_FLOW_OFF(cf_sk); - wake_up_interruptible(cf_sk->sk.sk_sleep); + /* Connect request failed */ + dbfs_atomic_inc(&cnt.num_connect_fail_resp); + cf_sk->sk.sk_err = ECONNREFUSED; + cf_sk->sk.sk_state = CAIF_DISCONNECTED; + cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; + /* + * Socket "standards" seems to require POLLOUT to + * be set at connect failure. + */ + set_tx_flow_on(cf_sk); + cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: + /* Modem has closed this connection, or device is down. */ dbfs_atomic_inc(&cnt.num_remote_shutdown_ind); - SET_REMOTE_SHUTDOWN(cf_sk); - /* Use sk_shutdown to indicate remote shutdown indication */ - cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN; - cf_sk->file_mode = 0; - wake_up_interruptible(cf_sk->sk.sk_sleep); + cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; + cf_sk->sk.sk_err = ECONNRESET; + set_rx_flow_on(cf_sk); + cf_sk->sk.sk_error_report(&cf_sk->sk); break; default: pr_debug("CAIF: %s(): Unexpected flow command %d\n", - __func__, flow); + __func__, flow); } } -static void skb_destructor(struct sk_buff *skb) +static void caif_check_flow_release(struct sock *sk) { - dbfs_atomic_inc(&cnt.skb_free); - dbfs_atomic_dec(&cnt.skb_in_use); -} + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL) + return; + if (rx_flow_is_on(cf_sk)) + return; -static int caif_recvmsg(struct kiocb *iocb, struct socket *sock, + if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) { + dbfs_atomic_inc(&cnt.num_rx_flow_on); + set_rx_flow_on(cf_sk); + cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, + CAIF_MODEMCMD_FLOW_ON_REQ); + } +} +/* + * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer + * has sufficient size. + */ + +static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t buf_len, int flags) { struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - struct cfpkt *pkt = NULL; - size_t len; - int result; struct sk_buff *skb; - ssize_t ret = -EIO; - int read_queue_low; - - if (cf_sk == NULL) { - pr_debug("CAIF: %s(): private_data not set!\n", - __func__); - ret = -EBADFD; - goto read_error; - } - - /* Don't do multiple iovec entries yet */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; + int ret = 0; + int len; if (unlikely(!buf_len)) return -EINVAL; - lock_sock(&(cf_sk->sk)); - - caif_assert(cf_sk->pktq); - - if (!STATE_IS_OPEN(cf_sk)) { - /* Socket is closed or closing. */ - if (!STATE_IS_PENDING(cf_sk)) { - pr_debug("CAIF: %s(): socket is closed (by remote)\n", - __func__); - ret = -EPIPE; - } else { - pr_debug("CAIF: %s(): socket is closing..\n", __func__); - ret = -EBADF; - } + skb = skb_recv_datagram(sk, flags, 0 , &ret); + if (!skb) goto read_error; - } - /* Socket is open or opening. */ - if (STATE_IS_PENDING(cf_sk)) { - pr_debug("CAIF: %s(): socket is opening...\n", __func__); - - if (flags & MSG_DONTWAIT) { - /* We can't block. */ - pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n", - __func__); - ret = -EAGAIN; - goto read_error; - } + len = skb->len; + + if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) { + len = buf_len; /* - * Blocking mode; state is pending and we need to wait - * for its conclusion. + * Push skb back on receive queue if buffer too small. + * This has a built-in race where multi-threaded receive + * may get packet in wrong order, but multiple read does + * not really guarantee ordered delivery anyway. + * Let's optimize for speed without taking locks. */ - release_sock(&cf_sk->sk); - - result = - wait_event_interruptible(*cf_sk->sk.sk_sleep, - !STATE_IS_PENDING(cf_sk)); - lock_sock(&(cf_sk->sk)); - - if (result == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - " woken by a signal (1)", __func__); - ret = -ERESTARTSYS; - goto read_error; - } + skb_queue_head(&sk->sk_receive_queue, skb); + ret = -EMSGSIZE; + goto read_error; } - if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) || - !STATE_IS_OPEN(cf_sk) || - STATE_IS_PENDING(cf_sk)) { - - pr_debug("CAIF: %s(): socket closed\n", - __func__); - ret = -ESHUTDOWN; + ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len); + if (ret) goto read_error; - } - /* - * Block if we don't have any received buffers. - * The queue has its own lock. - */ - while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) { + skb_free_datagram(sk, skb); - if (flags & MSG_DONTWAIT) { - pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__); - ret = -EAGAIN; - goto read_error; - } - trace_printk("CAIF: %s() wait_event\n", __func__); + caif_check_flow_release(sk); - /* Let writers in. */ - release_sock(&cf_sk->sk); + return len; - /* Block reader until data arrives or socket is closed. */ - if (wait_event_interruptible(*cf_sk->sk.sk_sleep, - cfpkt_qpeek(cf_sk->pktq) - || STATE_IS_REMOTE_SHUTDOWN(cf_sk) - || !STATE_IS_OPEN(cf_sk)) == - -ERESTARTSYS) { - pr_debug("CAIF: %s():" - " wait_event_interruptible woken by " - "a signal, signal_pending(current) = %d\n", - __func__, - signal_pending(current)); - return -ERESTARTSYS; - } +read_error: + return ret; +} - trace_printk("CAIF: %s() awake\n", __func__); - if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) { - pr_debug("CAIF: %s(): " - "received remote_shutdown indication\n", - __func__); - ret = -ESHUTDOWN; - goto read_error_no_unlock; - } - /* I want to be alone on cf_sk (except status and queue). */ - lock_sock(&(cf_sk->sk)); +/* Copied from unix_stream_wait_data, identical except for lock call. */ +static long caif_stream_data_wait(struct sock *sk, long timeo) +{ + DEFINE_WAIT(wait); + lock_sock(sk); + + for (;;) { + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + if (!skb_queue_empty(&sk->sk_receive_queue) || + sk->sk_err || + sk->sk_state != CAIF_CONNECTED || + sock_flag(sk, SOCK_DEAD) || + (sk->sk_shutdown & RCV_SHUTDOWN) || + signal_pending(current) || + !timeo) + break; - if (!STATE_IS_OPEN(cf_sk)) { - /* Someone closed the link, report error. */ - pr_debug("CAIF: %s(): remote end shutdown!\n", - __func__); - ret = -EPIPE; - goto read_error; - } + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } - /* The queue has its own lock. */ - len = cfpkt_getlen(pkt); - - /* Check max length that can be copied. */ - if (len <= buf_len) - pkt = cfpkt_dequeue(cf_sk->pktq); - else { - pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n", - __func__, (long) len, (long) buf_len); - if (sock->type == SOCK_SEQPACKET) { - ret = -EMSGSIZE; - goto read_error; - } - len = buf_len; - } + finish_wait(sk_sleep(sk), &wait); + release_sock(sk); + return timeo; +} - spin_lock(&cf_sk->read_queue_len_lock); - cf_sk->read_queue_len--; - read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW); - spin_unlock(&cf_sk->read_queue_len_lock); +/* + * Copied from unix_stream_recvmsg, but removed credit checks, + * changed locking calls, changed address handling. + */ +static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + int copied = 0; + int target; + int err = 0; + long timeo; - if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) { - dbfs_atomic_inc(&cnt.num_rx_flow_on); - SET_RX_FLOW_ON(cf_sk); + err = -EOPNOTSUPP; + if (flags&MSG_OOB) + goto out; - /* Send flow on. */ - pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n", - __func__, cf_sk->read_queue_len); - caif_assert(cf_sk->layer.dn); - caif_assert(cf_sk->layer.dn->ctrlcmd); - (void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, - CAIF_MODEMCMD_FLOW_ON_REQ); + msg->msg_namelen = 0; - caif_assert(cf_sk->read_queue_len >= 0); - } + /* + * Lock the socket to prevent queue disordering + * while sleeps in memcpy_tomsg + */ + err = -EAGAIN; + if (sk->sk_state == CAIF_CONNECTING) + goto out; - skb = cfpkt_tonative(pkt); - result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len); - skb_pull(skb, len); + caif_read_lock(sk); + target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - if (result) { - pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__); - cfpkt_destroy(pkt); - ret = -EFAULT; - goto read_error; - } + do { + int chunk; + struct sk_buff *skb; - /* Free packet and remove from queue */ - if (skb->len == 0) - skb_free_datagram(sk, skb); + lock_sock(sk); + skb = skb_dequeue(&sk->sk_receive_queue); + caif_check_flow_release(sk); - /* Let the others in. */ - release_sock(&cf_sk->sk); - return len; + if (skb == NULL) { + if (copied >= target) + goto unlock; + /* + * POSIX 1003.1g mandates this order. + */ + err = sock_error(sk); + if (err) + goto unlock; + err = -ECONNRESET; + if (sk->sk_shutdown & RCV_SHUTDOWN) + goto unlock; -read_error: - release_sock(&cf_sk->sk); -read_error_no_unlock: - return ret; -} + err = -EPIPE; + if (sk->sk_state != CAIF_CONNECTED) + goto unlock; + if (sock_flag(sk, SOCK_DEAD)) + goto unlock; -/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */ -static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) -{ + release_sock(sk); - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - size_t payload_size = msg->msg_iov->iov_len; - struct cfpkt *pkt = NULL; - struct caif_payload_info info; - unsigned char *txbuf; - ssize_t ret = -EIO; - int result; - struct sk_buff *skb; - caif_assert(msg->msg_iovlen == 1); + err = -EAGAIN; + if (!timeo) + break; - if (cf_sk == NULL) { - pr_debug("CAIF: %s(): private_data not set!\n", - __func__); - ret = -EBADFD; - goto write_error_no_unlock; - } + caif_read_unlock(sk); - if (unlikely(msg->msg_iov->iov_base == NULL)) { - pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__); - ret = -EINVAL; - goto write_error_no_unlock; - } + timeo = caif_stream_data_wait(sk, timeo); - if (payload_size > CAIF_MAX_PAYLOAD_SIZE) { - pr_debug("CAIF: %s(): buffer too long\n", __func__); - if (sock->type == SOCK_SEQPACKET) { - ret = -EINVAL; - goto write_error_no_unlock; + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + caif_read_lock(sk); + continue; +unlock: + release_sock(sk); + break; } - payload_size = CAIF_MAX_PAYLOAD_SIZE; - } + release_sock(sk); + chunk = min_t(unsigned int, skb->len, size); + if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { + skb_queue_head(&sk->sk_receive_queue, skb); + if (copied == 0) + copied = -EFAULT; + break; + } + copied += chunk; + size -= chunk; - /* I want to be alone on cf_sk (except status and queue) */ - lock_sock(&(cf_sk->sk)); + /* Mark read part of skb as used */ + if (!(flags & MSG_PEEK)) { + skb_pull(skb, chunk); - caif_assert(cf_sk->pktq); + /* put the skb back if we didn't use it up. */ + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + break; + } + kfree_skb(skb); - if (!STATE_IS_OPEN(cf_sk)) { - /* Socket is closed or closing */ - if (!STATE_IS_PENDING(cf_sk)) { - pr_debug("CAIF: %s(): socket is closed (by remote)\n", - __func__); - ret = -EPIPE; } else { - pr_debug("CAIF: %s(): socket is closing...\n", - __func__); - ret = -EBADF; - } - goto write_error; - } - - /* Socket is open or opening */ - if (STATE_IS_PENDING(cf_sk)) { - pr_debug("CAIF: %s(): socket is opening...\n", __func__); - - if (msg->msg_flags & MSG_DONTWAIT) { - /* We can't block */ - trace_printk("CAIF: %s():state pending:" - "state=MSG_DONTWAIT\n", __func__); - ret = -EAGAIN; - goto write_error; + /* + * It is questionable, see note in unix_dgram_recvmsg. + */ + /* put message back and return */ + skb_queue_head(&sk->sk_receive_queue, skb); + break; } - /* Let readers in */ - release_sock(&cf_sk->sk); - - /* - * Blocking mode; state is pending and we need to wait - * for its conclusion. - */ - result = - wait_event_interruptible(*cf_sk->sk.sk_sleep, - !STATE_IS_PENDING(cf_sk)); - /* I want to be alone on cf_sk (except status and queue) */ - lock_sock(&(cf_sk->sk)); + } while (size); + caif_read_unlock(sk); - if (result == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - " woken by a signal (1)", __func__); - ret = -ERESTARTSYS; - goto write_error; - } - } - if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) || - !STATE_IS_OPEN(cf_sk) || - STATE_IS_PENDING(cf_sk)) { +out: + return copied ? : err; +} - pr_debug("CAIF: %s(): socket closed\n", - __func__); - ret = -ESHUTDOWN; - goto write_error; +/* + * Copied from sock.c:sock_wait_for_wmem, but change to wait for + * CAIF flow-on and sock_writable. + */ +static long caif_wait_for_flow_on(struct caifsock *cf_sk, + int wait_writeable, long timeo, int *err) +{ + struct sock *sk = &cf_sk->sk; + DEFINE_WAIT(wait); + for (;;) { + *err = 0; + if (tx_flow_is_on(cf_sk) && + (!wait_writeable || sock_writeable(&cf_sk->sk))) + break; + *err = -ETIMEDOUT; + if (!timeo) + break; + *err = -ERESTARTSYS; + if (signal_pending(current)) + break; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + *err = -ECONNRESET; + if (sk->sk_shutdown & SHUTDOWN_MASK) + break; + *err = -sk->sk_err; + if (sk->sk_err) + break; + *err = -EPIPE; + if (cf_sk->sk.sk_state != CAIF_CONNECTED) + break; + timeo = schedule_timeout(timeo); } + finish_wait(sk_sleep(sk), &wait); + return timeo; +} - if (!TX_FLOW_IS_ON(cf_sk)) { +/* + * Transmit a SKB. The device may temporarily request re-transmission + * by returning EAGAIN. + */ +static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, + int noblock, long timeo) +{ + struct cfpkt *pkt; + int ret, loopcnt = 0; - /* Flow is off. Check non-block flag */ - if (msg->msg_flags & MSG_DONTWAIT) { - trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off", - __func__); - ret = -EAGAIN; - goto write_error; - } + pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb); + memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info)); + do { - /* release lock before waiting */ - release_sock(&cf_sk->sk); + ret = -ETIMEDOUT; - /* Wait until flow is on or socket is closed */ - if (wait_event_interruptible(*cf_sk->sk.sk_sleep, - TX_FLOW_IS_ON(cf_sk) - || !STATE_IS_OPEN(cf_sk) - || STATE_IS_REMOTE_SHUTDOWN(cf_sk) - ) == -ERESTARTSYS) { - pr_debug("CAIF: %s():" - " wait_event_interruptible woken by a signal", - __func__); - ret = -ERESTARTSYS; - goto write_error_no_unlock; + /* Slight paranoia, probably not needed. */ + if (unlikely(loopcnt++ > 1000)) { + pr_warning("CAIF: %s(): transmit retries failed," + " error = %d\n", __func__, ret); + break; } - /* I want to be alone on cf_sk (except status and queue) */ - lock_sock(&(cf_sk->sk)); - - if (!STATE_IS_OPEN(cf_sk)) { - /* someone closed the link, report error */ - pr_debug("CAIF: %s(): remote end shutdown!\n", - __func__); - ret = -EPIPE; - goto write_error; + if (cf_sk->layer.dn != NULL) + ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); + if (likely(ret >= 0)) + break; + /* if transmit return -EAGAIN, then retry */ + if (noblock && ret == -EAGAIN) + break; + timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret); + if (signal_pending(current)) { + ret = sock_intr_errno(timeo); + break; } - - if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) { - pr_debug("CAIF: %s(): " - "received remote_shutdown indication\n", - __func__); - ret = -ESHUTDOWN; - goto write_error; + if (ret) + break; + if (cf_sk->sk.sk_state != CAIF_CONNECTED || + sock_flag(&cf_sk->sk, SOCK_DEAD) || + (cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) { + ret = -EPIPE; + cf_sk->sk.sk_err = EPIPE; + break; } - } + } while (ret == -EAGAIN); + return ret; +} - pkt = cfpkt_create(payload_size); - skb = (struct sk_buff *)pkt; - skb->destructor = skb_destructor; - skb->sk = sk; - dbfs_atomic_inc(&cnt.skb_alloc); - dbfs_atomic_inc(&cnt.skb_in_use); - if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) { - pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__); - cfpkt_destroy(pkt); - ret = -EINVAL; - goto write_error; - } +/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ +static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + int buffer_size; + int ret = 0; + struct sk_buff *skb = NULL; + int noblock; + long timeo; + caif_assert(cf_sk); + ret = sock_error(sk); + if (ret) + goto err; + + ret = -EOPNOTSUPP; + if (msg->msg_flags&MSG_OOB) + goto err; + + ret = -EOPNOTSUPP; + if (msg->msg_namelen) + goto err; + + ret = -EINVAL; + if (unlikely(msg->msg_iov->iov_base == NULL)) + goto err; + noblock = msg->msg_flags & MSG_DONTWAIT; + + buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM; + + ret = -EMSGSIZE; + if (buffer_size > CAIF_MAX_PAYLOAD_SIZE) + goto err; + + timeo = sock_sndtimeo(sk, noblock); + timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), + 1, timeo, &ret); + + ret = -EPIPE; + if (cf_sk->sk.sk_state != CAIF_CONNECTED || + sock_flag(sk, SOCK_DEAD) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + goto err; + + ret = -ENOMEM; + skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); + if (!skb) + goto err; + skb_reserve(skb, CAIF_NEEDED_HEADROOM); + + ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + + if (ret) + goto err; + ret = transmit_skb(skb, cf_sk, noblock, timeo); + if (ret < 0) + goto err; + return len; +err: + kfree_skb(skb); + return ret; +} - /* Copy data into buffer. */ - if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) { - pr_debug("CAIF: %s(): copy_from_user returned non zero.\n", - __func__); - cfpkt_destroy(pkt); - ret = -EINVAL; - goto write_error; - } - memset(&info, 0, sizeof(info)); +/* + * Copied from unix_stream_sendmsg and adapted to CAIF: + * Changed removed permission handling and added waiting for flow on + * and other minor adaptations. + */ +static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + int err, size; + struct sk_buff *skb; + int sent = 0; + long timeo; - /* Send the packet down the stack. */ - caif_assert(cf_sk->layer.dn); - caif_assert(cf_sk->layer.dn->transmit); + err = -EOPNOTSUPP; - do { - ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); + if (unlikely(msg->msg_flags&MSG_OOB)) + goto out_err; - if (likely((ret >= 0) || (ret != -EAGAIN))) - break; + if (unlikely(msg->msg_namelen)) + goto out_err; - /* EAGAIN - retry */ - if (msg->msg_flags & MSG_DONTWAIT) { - pr_debug("CAIF: %s(): NONBLOCK and transmit failed," - " error = %ld\n", __func__, (long) ret); - ret = -EAGAIN; - goto write_error; - } + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err); - /* Let readers in */ - release_sock(&cf_sk->sk); + if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) + goto pipe_err; - /* Wait until flow is on or socket is closed */ - if (wait_event_interruptible(*cf_sk->sk.sk_sleep, - TX_FLOW_IS_ON(cf_sk) - || !STATE_IS_OPEN(cf_sk) - || STATE_IS_REMOTE_SHUTDOWN(cf_sk) - ) == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - " woken by a signal", __func__); - ret = -ERESTARTSYS; - goto write_error_no_unlock; - } + while (sent < len) { - /* I want to be alone on cf_sk (except status and queue) */ - lock_sock(&(cf_sk->sk)); + size = len-sent; - } while (ret == -EAGAIN); + if (size > CAIF_MAX_PAYLOAD_SIZE) + size = CAIF_MAX_PAYLOAD_SIZE; - if (ret < 0) { - cfpkt_destroy(pkt); - pr_debug("CAIF: %s(): transmit failed, error = %ld\n", - __func__, (long) ret); + /* If size is more than half of sndbuf, chop up message */ + if (size > ((sk->sk_sndbuf >> 1) - 64)) + size = (sk->sk_sndbuf >> 1) - 64; - goto write_error; - } + if (size > SKB_MAX_ALLOC) + size = SKB_MAX_ALLOC; - release_sock(&cf_sk->sk); - return payload_size; + skb = sock_alloc_send_skb(sk, + size + CAIF_NEEDED_HEADROOM + + CAIF_NEEDED_TAILROOM, + msg->msg_flags&MSG_DONTWAIT, + &err); + if (skb == NULL) + goto out_err; -write_error: - release_sock(&cf_sk->sk); -write_error_no_unlock: - return ret; -} + skb_reserve(skb, CAIF_NEEDED_HEADROOM); + /* + * If you pass two values to the sock_alloc_send_skb + * it tries to grab the large buffer with GFP_NOFS + * (which can fail easily), and if it fails grab the + * fallback size buffer which is under a page and will + * succeed. [Alan] + */ + size = min_t(int, size, skb_tailroom(skb)); -static unsigned int caif_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - u32 mask = 0; - poll_wait(file, sk->sk_sleep, wait); - lock_sock(&(cf_sk->sk)); - if (!STATE_IS_OPEN(cf_sk)) { - if (!STATE_IS_PENDING(cf_sk)) - mask |= POLLHUP; - } else { - if (cfpkt_qpeek(cf_sk->pktq) != NULL) - mask |= (POLLIN | POLLRDNORM); - if (TX_FLOW_IS_ON(cf_sk)) - mask |= (POLLOUT | POLLWRNORM); + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err) { + kfree_skb(skb); + goto out_err; + } + err = transmit_skb(skb, cf_sk, + msg->msg_flags&MSG_DONTWAIT, timeo); + if (err < 0) { + kfree_skb(skb); + goto pipe_err; + } + sent += size; } - release_sock(&cf_sk->sk); - trace_printk("CAIF: %s(): poll mask=0x%04x\n", - __func__, mask); - return mask; -} - -static void drain_queue(struct caifsock *cf_sk) -{ - struct cfpkt *pkt = NULL; - - /* Empty the queue */ - do { - /* The queue has its own lock */ - if (!cf_sk->pktq) - break; - - pkt = cfpkt_dequeue(cf_sk->pktq); - if (!pkt) - break; - pr_debug("CAIF: %s(): freeing packet from read queue\n", - __func__); - cfpkt_destroy(pkt); - } while (1); + return sent; - cf_sk->read_queue_len = 0; +pipe_err: + if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); + err = -EPIPE; +out_err: + return sent ? : err; } static int setsockopt(struct socket *sock, @@ -736,19 +738,13 @@ static int setsockopt(struct socket *sock, int prio, linksel; struct ifreq ifreq; - if (STATE_IS_OPEN(cf_sk)) { - pr_debug("CAIF: %s(): setsockopt " - "cannot be done on a connected socket\n", - __func__); + if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) return -ENOPROTOOPT; - } + switch (opt) { case CAIFSO_LINK_SELECT: - if (ol < sizeof(int)) { - pr_debug("CAIF: %s(): setsockopt" - " CAIFSO_CHANNEL_CONFIG bad size\n", __func__); + if (ol < sizeof(int)) return -EINVAL; - } if (lvl != SOL_CAIF) goto bad_sol; if (copy_from_user(&linksel, ov, sizeof(int))) @@ -761,28 +757,20 @@ static int setsockopt(struct socket *sock, case SO_PRIORITY: if (lvl != SOL_SOCKET) goto bad_sol; - if (ol < sizeof(int)) { - pr_debug("CAIF: %s(): setsockopt" - " SO_PRIORITY bad size\n", __func__); + if (ol < sizeof(int)) return -EINVAL; - } if (copy_from_user(&prio, ov, sizeof(int))) return -EINVAL; lock_sock(&(cf_sk->sk)); cf_sk->conn_req.priority = prio; - pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__, - cf_sk->conn_req.priority); release_sock(&cf_sk->sk); return 0; case SO_BINDTODEVICE: if (lvl != SOL_SOCKET) goto bad_sol; - if (ol < sizeof(struct ifreq)) { - pr_debug("CAIF: %s(): setsockopt" - " SO_PRIORITY bad size\n", __func__); + if (ol < sizeof(struct ifreq)) return -EINVAL; - } if (copy_from_user(&ifreq, ov, sizeof(ifreq))) return -EFAULT; lock_sock(&(cf_sk->sk)); @@ -798,359 +786,275 @@ static int setsockopt(struct socket *sock, goto bad_sol; if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL) return -ENOPROTOOPT; - if (ol > sizeof(cf_sk->conn_req.param.data)) - goto req_param_bad_size; - lock_sock(&(cf_sk->sk)); cf_sk->conn_req.param.size = ol; - if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) { + if (ol > sizeof(cf_sk->conn_req.param.data) || + copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) { release_sock(&cf_sk->sk); -req_param_bad_size: - pr_debug("CAIF: %s(): setsockopt" - " CAIFSO_CHANNEL_CONFIG bad size\n", __func__); return -EINVAL; } - release_sock(&cf_sk->sk); return 0; default: - pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt); - return -EINVAL; + return -ENOPROTOOPT; } return 0; bad_sol: - pr_debug("CAIF: %s(): setsockopt bad level\n", __func__); return -ENOPROTOOPT; } -static int caif_connect(struct socket *sock, struct sockaddr *uservaddr, - int sockaddr_len, int flags) +/* + * caif_connect() - Connect a CAIF Socket + * Copied and modified af_irda.c:irda_connect(). + * + * Note : by consulting "errno", the user space caller may learn the cause + * of the failure. Most of them are visible in the function, others may come + * from subroutines called and are listed here : + * o -EAFNOSUPPORT: bad socket family or type. + * o -ESOCKTNOSUPPORT: bad socket type or protocol + * o -EINVAL: bad socket address, or CAIF link type + * o -ECONNREFUSED: remote end refused the connection. + * o -EINPROGRESS: connect request sent but timed out (or non-blocking) + * o -EISCONN: already connected. + * o -ETIMEDOUT: Connection timed out (send timeout) + * o -ENODEV: No link layer to send request + * o -ECONNRESET: Received Shutdown indication or lost link layer + * o -ENOMEM: Out of memory + * + * State Strategy: + * o sk_state: holds the CAIF_* protocol state, it's updated by + * caif_ctrl_cb. + * o sock->state: holds the SS_* socket state and is updated by connect and + * disconnect. + */ +static int caif_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { - struct caifsock *cf_sk = NULL; - int result = -1; - int mode = 0; - int ret = -EIO; struct sock *sk = sock->sk; - BUG_ON(sk == NULL); - - cf_sk = container_of(sk, struct caifsock, sk); - - trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n", - __func__, cf_sk, - STATE_IS_OPEN(cf_sk), - TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk)); - + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + long timeo; + int err; + lock_sock(sk); - if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM) - sock->state = SS_CONNECTING; - else + err = -EAFNOSUPPORT; + if (uaddr->sa_family != AF_CAIF) goto out; - /* I want to be alone on cf_sk (except status and queue) */ - lock_sock(&(cf_sk->sk)); - - if (sockaddr_len != sizeof(struct sockaddr_caif)) { - pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n", - __func__, (long) sockaddr_len, - (long unsigned) sizeof(struct sockaddr_caif)); - ret = -EINVAL; - goto open_error; + err = -ESOCKTNOSUPPORT; + if (unlikely(!(sk->sk_type == SOCK_STREAM && + cf_sk->sk.sk_protocol == CAIFPROTO_AT) && + sk->sk_type != SOCK_SEQPACKET)) + goto out; + switch (sock->state) { + case SS_UNCONNECTED: + /* Normal case, a fresh connect */ + caif_assert(sk->sk_state == CAIF_DISCONNECTED); + break; + case SS_CONNECTING: + switch (sk->sk_state) { + case CAIF_CONNECTED: + sock->state = SS_CONNECTED; + err = -EISCONN; + goto out; + case CAIF_DISCONNECTED: + /* Reconnect allowed */ + break; + case CAIF_CONNECTING: + err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; + goto wait_connect; + } + break; + case SS_CONNECTED: + caif_assert(sk->sk_state == CAIF_CONNECTED || + sk->sk_state == CAIF_DISCONNECTED); + if (sk->sk_shutdown & SHUTDOWN_MASK) { + /* Allow re-connect after SHUTDOWN_IND */ + caif_disconnect_client(&cf_sk->layer); + break; + } + /* No reconnect on a seqpacket socket */ + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + case SS_FREE: + caif_assert(1); /*Should never happen */ + break; } + sk->sk_state = CAIF_DISCONNECTED; + sock->state = SS_UNCONNECTED; + sk_stream_kill_queues(&cf_sk->sk); - if (uservaddr->sa_family != AF_CAIF) { - pr_debug("CAIF: %s(): Bad address family (%d)\n", - __func__, uservaddr->sa_family); - ret = -EAFNOSUPPORT; - goto open_error; - } + err = -EINVAL; + if (addr_len != sizeof(struct sockaddr_caif) || + !uaddr) + goto out; - memcpy(&cf_sk->conn_req.sockaddr, uservaddr, + memcpy(&cf_sk->conn_req.sockaddr, uaddr, sizeof(struct sockaddr_caif)); - dbfs_atomic_inc(&cnt.num_open); - mode = SKT_READ_FLAG | SKT_WRITE_FLAG; - - /* If socket is not open, make sure socket is in fully closed state */ - if (!STATE_IS_OPEN(cf_sk)) { - /* Has link close response been received (if we ever sent it)?*/ - if (STATE_IS_PENDING(cf_sk)) { - /* - * Still waiting for close response from remote. - * If opened non-blocking, report "would block" - */ - if (flags & O_NONBLOCK) { - pr_debug("CAIF: %s(): O_NONBLOCK" - " && close pending\n", __func__); - ret = -EAGAIN; - goto open_error; - } - - pr_debug("CAIF: %s(): Wait for close response" - " from remote...\n", __func__); - - release_sock(&cf_sk->sk); - - /* - * Blocking mode; close is pending and we need to wait - * for its conclusion. - */ - result = - wait_event_interruptible(*cf_sk->sk.sk_sleep, - !STATE_IS_PENDING(cf_sk)); - - lock_sock(&(cf_sk->sk)); - if (result == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - "woken by a signal (1)", __func__); - ret = -ERESTARTSYS; - goto open_error; - } - } + /* Move to connecting socket, start sending Connect Requests */ + sock->state = SS_CONNECTING; + sk->sk_state = CAIF_CONNECTING; + + dbfs_atomic_inc(&cnt.num_connect_req); + cf_sk->layer.receive = caif_sktrecv_cb; + err = caif_connect_client(&cf_sk->conn_req, + &cf_sk->layer); + if (err < 0) { + cf_sk->sk.sk_socket->state = SS_UNCONNECTED; + cf_sk->sk.sk_state = CAIF_DISCONNECTED; + goto out; } - /* socket is now either closed, pending open or open */ - if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) { - /* Open */ - pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)" - " check access f_flags = 0x%x file_mode = 0x%x\n", - __func__, cf_sk, mode, cf_sk->file_mode); - - } else { - /* We are closed or pending open. - * If closed: send link setup - * If pending open: link setup already sent (we could have been - * interrupted by a signal last time) - */ - if (!STATE_IS_OPEN(cf_sk)) { - /* First opening of file; connect lower layers: */ - /* Drain queue (very unlikely) */ - drain_queue(cf_sk); - - cf_sk->layer.receive = caif_sktrecv_cb; - SET_STATE_OPEN(cf_sk); - SET_PENDING_ON(cf_sk); - - /* Register this channel. */ - result = - caif_connect_client(&cf_sk->conn_req, - &cf_sk->layer); - if (result < 0) { - pr_debug("CAIF: %s(): can't register channel\n", - __func__); - ret = -EIO; - SET_STATE_CLOSED(cf_sk); - SET_PENDING_OFF(cf_sk); - goto open_error; - } - dbfs_atomic_inc(&cnt.num_init); - } - - /* If opened non-blocking, report "success". - */ - if (flags & O_NONBLOCK) { - pr_debug("CAIF: %s(): O_NONBLOCK success\n", - __func__); - ret = -EINPROGRESS; - cf_sk->sk.sk_err = -EINPROGRESS; - goto open_error; - } - - trace_printk("CAIF: %s(): Wait for connect response\n", - __func__); + err = -EINPROGRESS; +wait_connect: - /* release lock before waiting */ - release_sock(&cf_sk->sk); - - result = - wait_event_interruptible(*cf_sk->sk.sk_sleep, - !STATE_IS_PENDING(cf_sk)); - - lock_sock(&(cf_sk->sk)); - - if (result == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - "woken by a signal (2)", __func__); - ret = -ERESTARTSYS; - goto open_error; - } - - if (!STATE_IS_OPEN(cf_sk)) { - /* Lower layers said "no" */ - pr_debug("CAIF: %s(): Closed received\n", __func__); - ret = -EPIPE; - goto open_error; - } + if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK)) + goto out; - trace_printk("CAIF: %s(): Connect received\n", __func__); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + + release_sock(sk); + err = wait_event_interruptible_timeout(*sk_sleep(sk), + sk->sk_state != CAIF_CONNECTING, + timeo); + lock_sock(sk); + if (err < 0) + goto out; /* -ERESTARTSYS */ + if (err == 0 && sk->sk_state != CAIF_CONNECTED) { + err = -ETIMEDOUT; + goto out; } - /* Open is ok */ - cf_sk->file_mode |= mode; - trace_printk("CAIF: %s(): Connected - file mode = %x\n", - __func__, cf_sk->file_mode); - - release_sock(&cf_sk->sk); - return 0; -open_error: - sock->state = SS_UNCONNECTED; - release_sock(&cf_sk->sk); + if (sk->sk_state != CAIF_CONNECTED) { + sock->state = SS_UNCONNECTED; + err = sock_error(sk); + if (!err) + err = -ECONNREFUSED; + goto out; + } + sock->state = SS_CONNECTED; + err = 0; out: - return ret; + release_sock(sk); + return err; } -static int caif_shutdown(struct socket *sock, int how) + +/* + * caif_release() - Disconnect a CAIF Socket + * Copied and modified af_irda.c:irda_release(). + */ +static int caif_release(struct socket *sock) { - struct caifsock *cf_sk = NULL; - int result = 0; - int tx_flow_state_was_on; struct sock *sk = sock->sk; + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); + int res = 0; - trace_printk("CAIF: %s(): enter\n", __func__); - pr_debug("f_flags=%x\n", sock->file->f_flags); - - if (how != SHUT_RDWR) - return -EOPNOTSUPP; - - cf_sk = container_of(sk, struct caifsock, sk); - if (cf_sk == NULL) { - pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__); - return -EBADF; - } - - /* I want to be alone on cf_sk (except status queue) */ - lock_sock(&(cf_sk->sk)); - sock_hold(&cf_sk->sk); - - /* IS_CLOSED have double meaning: - * 1) Spontanous Remote Shutdown Request. - * 2) Ack on a channel teardown(disconnect) - * Must clear bit in case we previously received - * remote shudown request. - */ - if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) { - SET_STATE_CLOSED(cf_sk); - SET_PENDING_ON(cf_sk); - tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk); - SET_TX_FLOW_OFF(cf_sk); - - /* Hold the socket until DEINIT_RSP is received */ - sock_hold(&cf_sk->sk); - result = caif_disconnect_client(&cf_sk->layer); - - if (result < 0) { - pr_debug("CAIF: %s(): " - "caif_disconnect_client() failed\n", - __func__); - SET_STATE_CLOSED(cf_sk); - SET_PENDING_OFF(cf_sk); - SET_TX_FLOW_OFF(cf_sk); - release_sock(&cf_sk->sk); - sock_put(&cf_sk->sk); - return -EIO; - } + if (!sk) + return 0; - } - if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) { - SET_PENDING_OFF(cf_sk); - SET_REMOTE_SHUTDOWN_OFF(cf_sk); - } + set_tx_flow_off(cf_sk); /* - * Socket is no longer in state pending close, - * and we can release the reference. + * Ensure that packets are not queued after this point in time. + * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock, + * this ensures no packets when sock is dead. */ + spin_lock(&sk->sk_receive_queue.lock); + sock_set_flag(sk, SOCK_DEAD); + spin_unlock(&sk->sk_receive_queue.lock); + sock->sk = NULL; - dbfs_atomic_inc(&cnt.num_close); - drain_queue(cf_sk); - SET_RX_FLOW_ON(cf_sk); - cf_sk->file_mode = 0; - sock_put(&cf_sk->sk); - release_sock(&cf_sk->sk); - if (!result && (sock->file->f_flags & O_NONBLOCK)) { - pr_debug("nonblocking shutdown returing -EAGAIN\n"); - return -EAGAIN; - } else - return result; -} - -static ssize_t caif_sock_no_sendpage(struct socket *sock, - struct page *page, - int offset, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -/* This function is called as part of close. */ -static int caif_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct caifsock *cf_sk = NULL; - int res; - caif_assert(sk != NULL); - cf_sk = container_of(sk, struct caifsock, sk); + dbfs_atomic_inc(&cnt.num_disconnect); if (cf_sk->debugfs_socket_dir != NULL) debugfs_remove_recursive(cf_sk->debugfs_socket_dir); - res = caif_shutdown(sock, SHUT_RDWR); - if (res && res != -EINPROGRESS) - return res; - - /* - * FIXME: Shutdown should probably be possible to do async - * without flushing queues, allowing reception of frames while - * waiting for DEINIT_IND. - * Release should always block, to allow secure decoupling of - * CAIF stack. - */ - if (!(sock->file->f_flags & O_NONBLOCK)) { - res = wait_event_interruptible(*cf_sk->sk.sk_sleep, - !STATE_IS_PENDING(cf_sk)); - - if (res == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible" - "woken by a signal (1)", __func__); - } - } lock_sock(&(cf_sk->sk)); + sk->sk_state = CAIF_DISCONNECTED; + sk->sk_shutdown = SHUTDOWN_MASK; - sock->sk = NULL; + if (cf_sk->sk.sk_socket->state == SS_CONNECTED || + cf_sk->sk.sk_socket->state == SS_CONNECTING) + res = caif_disconnect_client(&cf_sk->layer); - /* Detach the socket from its process context by making it orphan. */ - sock_orphan(sk); + cf_sk->sk.sk_socket->state = SS_DISCONNECTING; + wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP); - /* - * Setting SHUTDOWN_MASK means that both send and receive are shutdown - * for the socket. - */ - sk->sk_shutdown = SHUTDOWN_MASK; + sock_orphan(sk); + cf_sk->layer.dn = NULL; + sk_stream_kill_queues(&cf_sk->sk); + release_sock(sk); + sock_put(sk); + return res; +} - /* - * Set the socket state to closed, the TCP_CLOSE macro is used when - * closing any socket. - */ +/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ +static unsigned int caif_poll(struct file *file, + struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask; + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - /* Flush out this sockets receive queue. */ - drain_queue(cf_sk); + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; - /* Finally release the socket. */ - SET_STATE_PENDING_DESTROY(cf_sk); + /* exceptional events? */ + if (sk->sk_err) + mask |= POLLERR; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; - release_sock(&cf_sk->sk); + /* readable? */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; - sock_put(sk); + /* Connection-based need to check for termination and startup */ + if (sk->sk_state == CAIF_DISCONNECTED) + mask |= POLLHUP; /* - * The rest of the cleanup will be handled from the - * caif_sock_destructor + * we set writable also when the other side has shut down the + * connection. This prevents stuck sockets. */ - return res; + if (sock_writeable(sk) && tx_flow_is_on(cf_sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; } -static const struct proto_ops caif_ops = { +static const struct proto_ops caif_seqpacket_ops = { + .family = PF_CAIF, + .owner = THIS_MODULE, + .release = caif_release, + .bind = sock_no_bind, + .connect = caif_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = caif_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = caif_seqpkt_sendmsg, + .recvmsg = caif_seqpkt_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static const struct proto_ops caif_stream_ops = { .family = PF_CAIF, .owner = THIS_MODULE, .release = caif_release, @@ -1162,73 +1066,62 @@ static const struct proto_ops caif_ops = { .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, - .shutdown = caif_shutdown, + .shutdown = sock_no_shutdown, .setsockopt = setsockopt, .getsockopt = sock_no_getsockopt, - .sendmsg = caif_sendmsg, - .recvmsg = caif_recvmsg, + .sendmsg = caif_stream_sendmsg, + .recvmsg = caif_stream_recvmsg, .mmap = sock_no_mmap, - .sendpage = caif_sock_no_sendpage, + .sendpage = sock_no_sendpage, }; /* This function is called when a socket is finally destroyed. */ static void caif_sock_destructor(struct sock *sk) { - struct caifsock *cf_sk = NULL; - cf_sk = container_of(sk, struct caifsock, sk); - /* Error checks. */ + struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); caif_assert(!atomic_read(&sk->sk_wmem_alloc)); caif_assert(sk_unhashed(sk)); caif_assert(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { - pr_debug("CAIF: %s(): 0x%p", __func__, sk); + pr_info("Attempt to release alive CAIF socket: %p\n", sk); return; } - - if (STATE_IS_OPEN(cf_sk)) { - pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)" - " file_mode = 0x%x\n", __func__, - cf_sk, cf_sk->file_mode); - return; - } - drain_queue(cf_sk); - kfree(cf_sk->pktq); - - trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n", - __func__, cf_sk->name); - atomic_dec(&caif_nr_socks); + sk_stream_kill_queues(&cf_sk->sk); + dbfs_atomic_dec(&cnt.caif_nr_socks); } static int caif_create(struct net *net, struct socket *sock, int protocol, - int kern) + int kern) { struct sock *sk = NULL; struct caifsock *cf_sk = NULL; - int result = 0; static struct proto prot = {.name = "PF_CAIF", .owner = THIS_MODULE, .obj_size = sizeof(struct caifsock), }; + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) + return -EPERM; /* * The sock->type specifies the socket type to use. - * in SEQPACKET mode packet boundaries are enforced. + * The CAIF socket is a packet stream in the sense + * that it is packet based. CAIF trusts the reliability + * of the link, no resending is implemented. */ - if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) + if (sock->type == SOCK_SEQPACKET) + sock->ops = &caif_seqpacket_ops; + else if (sock->type == SOCK_STREAM) + sock->ops = &caif_stream_ops; + else return -ESOCKTNOSUPPORT; - if (net != &init_net) - return -EAFNOSUPPORT; - if (protocol < 0 || protocol >= CAIFPROTO_MAX) return -EPROTONOSUPPORT; /* - * Set the socket state to unconnected. The socket state is really - * not used at all in the net/core or socket.c but the + * Set the socket state to unconnected. The socket state + * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ - sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); if (!sk) return -ENOMEM; @@ -1238,11 +1131,9 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, /* Store the protocol */ sk->sk_protocol = (unsigned char) protocol; - spin_lock_init(&cf_sk->read_queue_len_lock); - - /* Fill in some information concerning the misc socket. */ - snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d", - atomic_read(&caif_nr_socks)); + /* Sendbuf dictates the amount of outbound packets not yet sent */ + sk->sk_sndbuf = CAIF_DEF_SNDBUF; + sk->sk_rcvbuf = CAIF_DEF_RCVBUF; /* * Lock in order to try to stop someone from opening the socket @@ -1252,108 +1143,85 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, /* Initialize the nozero default sock structure data. */ sock_init_data(sock, sk); - sock->ops = &caif_ops; sk->sk_destruct = caif_sock_destructor; - sk->sk_sndbuf = caif_sockbuf_size; - sk->sk_rcvbuf = caif_sockbuf_size; - cf_sk->pktq = cfpktq_create(); + mutex_init(&cf_sk->readlock); /* single task reading lock */ + cf_sk->layer.ctrlcmd = caif_ctrl_cb; + cf_sk->sk.sk_socket->state = SS_UNCONNECTED; + cf_sk->sk.sk_state = CAIF_DISCONNECTED; - if (!cf_sk->pktq) { - pr_err("CAIF: %s(): queue create failed.\n", __func__); - result = -ENOMEM; - release_sock(&cf_sk->sk); - goto err_failed; - } - cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb; - SET_STATE_CLOSED(cf_sk); - SET_PENDING_OFF(cf_sk); - SET_TX_FLOW_OFF(cf_sk); - SET_RX_FLOW_ON(cf_sk); + set_tx_flow_off(cf_sk); + set_rx_flow_on(cf_sk); /* Set default options on configuration */ cf_sk->conn_req.priority = CAIF_PRIO_NORMAL; - cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW; + cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; cf_sk->conn_req.protocol = protocol; /* Increase the number of sockets created. */ - atomic_inc(&caif_nr_socks); + dbfs_atomic_inc(&cnt.caif_nr_socks); +#ifdef CONFIG_DEBUG_FS if (!IS_ERR(debugfsdir)) { + /* Fill in some information concerning the misc socket. */ + snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d", + atomic_read(&cnt.caif_nr_socks)); + cf_sk->debugfs_socket_dir = debugfs_create_dir(cf_sk->name, debugfsdir); - debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR, - cf_sk->debugfs_socket_dir, &cf_sk->conn_state); + debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR, + cf_sk->debugfs_socket_dir, + (u32 *) &cf_sk->sk.sk_state); debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR, cf_sk->debugfs_socket_dir, &cf_sk->flow_state); - debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR, + debugfs_create_u32("sk_rmem_alloc", S_IRUSR | S_IWUSR, + cf_sk->debugfs_socket_dir, + (u32 *) &cf_sk->sk.sk_rmem_alloc); + debugfs_create_u32("sk_wmem_alloc", S_IRUSR | S_IWUSR, cf_sk->debugfs_socket_dir, - (u32 *) &cf_sk->read_queue_len); + (u32 *) &cf_sk->sk.sk_wmem_alloc); debugfs_create_u32("identity", S_IRUSR | S_IWUSR, cf_sk->debugfs_socket_dir, (u32 *) &cf_sk->layer.id); } +#endif release_sock(&cf_sk->sk); return 0; -err_failed: - sk_free(sk); - return result; } + static struct net_proto_family caif_family_ops = { .family = PF_CAIF, .create = caif_create, .owner = THIS_MODULE, }; -static int af_caif_init(void) +int af_caif_init(void) { - int err; - err = sock_register(&caif_family_ops); - + int err = sock_register(&caif_family_ops); if (!err) return err; - return 0; } static int __init caif_sktinit_module(void) { - int stat; #ifdef CONFIG_DEBUG_FS - debugfsdir = debugfs_create_dir("chnl_skt", NULL); + debugfsdir = debugfs_create_dir("caif_sk", NULL); if (!IS_ERR(debugfsdir)) { - debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.skb_in_use); - debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.skb_alloc); - debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.skb_free); debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR, debugfsdir, - (u32 *) &caif_nr_socks); - debugfs_create_u32("num_open", S_IRUSR | S_IWUSR, + (u32 *) &cnt.caif_nr_socks); + debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR, debugfsdir, - (u32 *) &cnt.num_open); - debugfs_create_u32("num_close", S_IRUSR | S_IWUSR, + (u32 *) &cnt.num_connect_req); + debugfs_create_u32("num_connect_resp", S_IRUSR | S_IWUSR, debugfsdir, - (u32 *) &cnt.num_close); - debugfs_create_u32("num_init", S_IRUSR | S_IWUSR, + (u32 *) &cnt.num_connect_resp); + debugfs_create_u32("num_connect_fail_resp", S_IRUSR | S_IWUSR, debugfsdir, - (u32 *) &cnt.num_init); - debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR, + (u32 *) &cnt.num_connect_fail_resp); + debugfs_create_u32("num_disconnect", S_IRUSR | S_IWUSR, debugfsdir, - (u32 *) &cnt.num_init_resp); - debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.num_init_fail_resp); - debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.num_deinit); - debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR, - debugfsdir, - (u32 *) &cnt.num_deinit_resp); + (u32 *) &cnt.num_disconnect); debugfs_create_u32("num_remote_shutdown_ind", S_IRUSR | S_IWUSR, debugfsdir, (u32 *) &cnt.num_remote_shutdown_ind); @@ -1371,13 +1239,7 @@ static int __init caif_sktinit_module(void) (u32 *) &cnt.num_rx_flow_on); } #endif - stat = af_caif_init(); - if (stat) { - pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.", - __func__); - return stat; - } - return 0; + return af_caif_init(); } static void __exit caif_sktexit_module(void) @@ -1386,6 +1248,5 @@ static void __exit caif_sktexit_module(void) if (debugfsdir != NULL) debugfs_remove_recursive(debugfsdir); } - module_init(caif_sktinit_module); module_exit(caif_sktexit_module); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index c873e3d..471c629 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -51,12 +51,11 @@ struct cfcnfg { struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS]; }; -static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, +static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, u8 phyid, struct cflayer *adapt_layer); -static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid, - struct cflayer *client_layer); -static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid, +static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); +static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, struct cflayer *adapt_layer); static void cfctrl_resp_func(void); static void cfctrl_enum_resp(void); @@ -82,13 +81,13 @@ struct cfcnfg *cfcnfg_create(void) resp = cfctrl_get_respfuncs(this->ctrl); resp->enum_rsp = cfctrl_enum_resp; resp->linkerror_ind = cfctrl_resp_func; - resp->linkdestroy_rsp = cncfg_linkdestroy_rsp; + resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp; resp->sleep_rsp = cfctrl_resp_func; resp->wake_rsp = cfctrl_resp_func; resp->restart_rsp = cfctrl_resp_func; resp->radioset_rsp = cfctrl_resp_func; - resp->linksetup_rsp = cncfg_linkup_rsp; - resp->reject_rsp = cncfg_reject_rsp; + resp->linksetup_rsp = cfcnfg_linkup_rsp; + resp->reject_rsp = cfcnfg_reject_rsp; this->last_phyid = 1; @@ -175,142 +174,82 @@ int cfcnfg_get_named(struct cfcnfg *cnfg, char *name) return 0; } -/* - * NOTE: What happens on destroy failure: - * 1a) No response - Too early - * This will not happen because enumerate has already - * completed. - * 1b) No response - FATAL - * Not handled, but this should be a CAIF PROTOCOL ERROR - * Modem error, response is really expected - this - * case is not really handled. - * 2) O/E-bit indicate error - * Ignored - this link is destroyed anyway. - * 3) Not able to match on request - * Not handled, but this should be a CAIF PROTOCOL ERROR - * 4) Link-Error - (no response) - * Not handled, but this should be a CAIF PROTOCOL ERROR - */ - -int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) +int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) { u8 channel_id = 0; int ret = 0; + struct cflayer *servl = NULL; struct cfcnfg_phyinfo *phyinfo = NULL; u8 phyid = 0; - caif_assert(adap_layer != NULL); channel_id = adap_layer->id; - if (channel_id == 0) { + if (adap_layer->dn == NULL || channel_id == 0) { pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); ret = -ENOTCONN; goto end; } - - if (adap_layer->dn == NULL) { - pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__); - ret = -ENODEV; - goto end; - } - - if (adap_layer->dn != NULL) - phyid = cfsrvl_getphyid(adap_layer->dn); - - phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); - if (phyinfo == NULL) { - pr_warning("CAIF: %s(): No interface to send disconnect to\n", - __func__); - ret = -ENODEV; + servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id); + if (servl == NULL) goto end; - } - - if (phyinfo->id != phyid - || phyinfo->phy_layer->id != phyid - || phyinfo->frm_layer->id != phyid) { - - pr_err("CAIF: %s(): Inconsistency in phy registration\n", - __func__); + layer_set_up(servl, NULL); + ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); + if (servl == NULL) { + pr_err("CAIF: %s(): PROTOCOL ERROR " + "- Error removing service_layer Channel_Id(%d)", + __func__, channel_id); ret = -EINVAL; goto end; } + caif_assert(channel_id == servl->id); + if (adap_layer->dn != NULL) { + phyid = cfsrvl_getphyid(adap_layer->dn); - ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); - -end: + phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); + if (phyinfo == NULL) { + pr_warning("CAIF: %s(): " + "No interface to send disconnect to\n", + __func__); + ret = -ENODEV; + goto end; + } + if (phyinfo->id != phyid || + phyinfo->phy_layer->id != phyid || + phyinfo->frm_layer->id != phyid) { + pr_err("CAIF: %s(): " + "Inconsistency in phy registration\n", + __func__); + ret = -EINVAL; + goto end; + } + } if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 && phyinfo->phy_layer != NULL && phyinfo->phy_layer->modemcmd != NULL) { phyinfo->phy_layer->modemcmd(phyinfo->phy_layer, _CAIF_MODEMCMD_PHYIF_USELESS); } +end: + cfsrvl_put(servl); + cfctrl_cancel_req(cnfg->ctrl, adap_layer); + if (adap_layer->ctrlcmd != NULL) + adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0); return ret; } -EXPORT_SYMBOL(cfcnfg_del_adapt_layer); +EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer); -static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid, - struct cflayer *client_layer) +void cfcnfg_release_adap_layer(struct cflayer *adap_layer) { - struct cfcnfg *cnfg = container_obj(layer); - struct cflayer *servl; - - /* - * 1) Remove service from the MUX layer. The MUX must - * guarante that no more payload sent "upwards" (receive) - */ - servl = cfmuxl_remove_uplayer(cnfg->mux, linkid); - - if (servl == NULL) { - pr_err("CAIF: %s(): PROTOCOL ERROR " - "- Error removing service_layer Linkid(%d)", - __func__, linkid); - return; - } - caif_assert(linkid == servl->id); - - if (servl != client_layer && servl->up != client_layer) { - pr_err("CAIF: %s(): Error removing service_layer " - "Linkid(%d) %p %p", - __func__, linkid, (void *) servl, - (void *) client_layer); - return; - } - - /* - * 2) DEINIT_RSP must guarantee that no more packets are transmitted - * from client (adap_layer) when it returns. - */ - - if (servl->ctrlcmd == NULL) { - pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__); - return; - } - - servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0); - - /* 3) It is now safe to destroy the service layer. */ - cfservl_destroy(servl); + if (adap_layer->dn) + cfsrvl_put(adap_layer->dn); } +EXPORT_SYMBOL(cfcnfg_release_adap_layer); -/* - * NOTE: What happens on linksetup failure: - * 1a) No response - Too early - * This will not happen because enumerate is secured - * before using interface. - * 1b) No response - FATAL - * Not handled, but this should be a CAIF PROTOCOL ERROR - * Modem error, response is really expected - this case is - * not really handled. - * 2) O/E-bit indicate error - * Handled in cnfg_reject_rsp - * 3) Not able to match on request - * Not handled, but this should be a CAIF PROTOCOL ERROR - * 4) Link-Error - (no response) - * Not handled, but this should be a CAIF PROTOCOL ERROR - */ +static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) +{ +} -int -cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, +int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, struct cfctrl_link_param *param, struct cflayer *adap_layer) { @@ -340,12 +279,11 @@ cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, param->phyid); /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ cfctrl_enum_req(cnfg->ctrl, param->phyid); - cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); - return 0; + return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); } EXPORT_SYMBOL(cfcnfg_add_adaptation_layer); -static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid, +static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, struct cflayer *adapt_layer) { if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) @@ -354,15 +292,17 @@ static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid, } static void -cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv, +cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, u8 phyid, struct cflayer *adapt_layer) { struct cfcnfg *cnfg = container_obj(layer); struct cflayer *servicel = NULL; struct cfcnfg_phyinfo *phyinfo; if (adapt_layer == NULL) { - pr_err("CAIF: %s(): PROTOCOL ERROR " - "- LinkUp Request/Response did not match\n", __func__); + pr_debug("CAIF: %s(): link setup response " + "but no client exist, send linkdown back\n", + __func__); + cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); return; } @@ -383,26 +323,26 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv, _CAIF_MODEMCMD_PHYIF_USEFULL); } - adapt_layer->id = linkid; + adapt_layer->id = channel_id; switch (serv) { case CFCTRL_SRV_VEI: - servicel = cfvei_create(linkid, &phyinfo->dev_info); + servicel = cfvei_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_DATAGRAM: - servicel = cfdgml_create(linkid, &phyinfo->dev_info); + servicel = cfdgml_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_RFM: - servicel = cfrfml_create(linkid, &phyinfo->dev_info); + servicel = cfrfml_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_UTIL: - servicel = cfutill_create(linkid, &phyinfo->dev_info); + servicel = cfutill_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_VIDEO: - servicel = cfvidl_create(linkid, &phyinfo->dev_info); + servicel = cfvidl_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_DBG: - servicel = cfdbgl_create(linkid, &phyinfo->dev_info); + servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); break; default: pr_err("CAIF: %s(): Protocol error. " @@ -415,9 +355,10 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv, return; } layer_set_dn(servicel, cnfg->mux); - cfmuxl_set_uplayer(cnfg->mux, servicel, linkid); + cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); layer_set_up(servicel, adapt_layer); layer_set_dn(adapt_layer, servicel); + cfsrvl_get(servicel); servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0); } diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 11f8014..a521d32 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -32,6 +32,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, struct cflayer *cfctrl_create(void) { + struct dev_info dev_info; struct cfctrl *this = kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); if (!this) { @@ -39,12 +40,13 @@ struct cflayer *cfctrl_create(void) return NULL; } caif_assert(offsetof(struct cfctrl, serv.layer) == 0); + memset(&dev_info, 0, sizeof(dev_info)); + dev_info.id = 0xff; memset(this, 0, sizeof(*this)); + cfsrvl_init(&this->serv, 0, &dev_info); spin_lock_init(&this->info_list_lock); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); - this->serv.dev_info.id = 0xff; - this->serv.layer.id = 0; this->serv.layer.receive = cfctrl_recv; sprintf(this->serv.layer.name, "ctrl"); this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; @@ -127,20 +129,6 @@ void cfctrl_insert_req(struct cfctrl *ctrl, spin_unlock(&ctrl->info_list_lock); } -static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd, - u8 linkid, struct cflayer *user_layer) -{ - struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL); - if (!req) { - pr_warning("CAIF: %s(): Out of memory\n", __func__); - return; - } - req->client_layer = user_layer; - req->cmd = cmd; - req->channel_id = linkid; - cfctrl_insert_req(ctrl, req); -} - /* Compare and remove request */ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) @@ -234,7 +222,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) } } -void cfctrl_linkup_request(struct cflayer *layer, +int cfctrl_linkup_request(struct cflayer *layer, struct cfctrl_link_param *param, struct cflayer *user_layer) { @@ -248,7 +236,7 @@ void cfctrl_linkup_request(struct cflayer *layer, struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); if (!pkt) { pr_warning("CAIF: %s(): Out of memory\n", __func__); - return; + return -ENOMEM; } cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype); @@ -294,11 +282,12 @@ void cfctrl_linkup_request(struct cflayer *layer, default: pr_warning("CAIF: %s():Request setup of bad link type = %d\n", __func__, param->linktype); + return -EINVAL; } req = kmalloc(sizeof(*req), GFP_KERNEL); if (!req) { pr_warning("CAIF: %s(): Out of memory\n", __func__); - return; + return -ENOMEM; } memset(req, 0, sizeof(*req)); req->client_layer = user_layer; @@ -306,6 +295,11 @@ void cfctrl_linkup_request(struct cflayer *layer, req->param = *param; cfctrl_insert_req(cfctrl, req); init_info(cfpkt_info(pkt), cfctrl); + /* + * NOTE:Always send linkup and linkdown request on the same + * device as the payload. Otherwise old queued up payload + * might arrive with the newly allocated channel ID. + */ cfpkt_info(pkt)->dev_info->id = param->phyid; ret = cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); @@ -313,7 +307,9 @@ void cfctrl_linkup_request(struct cflayer *layer, pr_err("CAIF: %s(): Could not transmit linksetup request\n", __func__); cfpkt_destroy(pkt); + return -ENODEV; } + return 0; } int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, @@ -326,7 +322,6 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, pr_warning("CAIF: %s(): Out of memory\n", __func__); return -ENOMEM; } - cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client); cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); cfpkt_addbdy(pkt, channelid); init_info(cfpkt_info(pkt), cfctrl); @@ -392,6 +387,38 @@ void cfctrl_getstartreason_req(struct cflayer *layer) } +void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) +{ + struct cfctrl_request_info *p, *req; + struct cfctrl *ctrl = container_obj(layr); + spin_lock(&ctrl->info_list_lock); + + if (ctrl->first_req == NULL) { + spin_unlock(&ctrl->info_list_lock); + return; + } + + if (ctrl->first_req->client_layer == adap_layer) { + + req = ctrl->first_req; + ctrl->first_req = ctrl->first_req->next; + kfree(req); + } + + p = ctrl->first_req; + while (p != NULL && p->next != NULL) { + if (p->next->client_layer == adap_layer) { + + req = p->next; + p->next = p->next->next; + kfree(p->next); + } + p = p->next; + } + + spin_unlock(&ctrl->info_list_lock); +} + static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) { u8 cmdrsp; @@ -409,11 +436,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) { - if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) { - pr_info("CAIF: %s() CAIF Protocol error:" - "Response bit not set\n", __func__); - goto error; - } + if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) + cmdrsp |= CFCTRL_ERR_BIT; } switch (cmd) { @@ -451,12 +475,16 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) switch (serv) { case CFCTRL_SRV_VEI: case CFCTRL_SRV_DBG: + if (CFCTRL_ERR_BIT & cmdrsp) + break; /* Link ID */ cfpkt_extr_head(pkt, &linkid, 1); break; case CFCTRL_SRV_VIDEO: cfpkt_extr_head(pkt, &tmp, 1); linkparam.u.video.connid = tmp; + if (CFCTRL_ERR_BIT & cmdrsp) + break; /* Link ID */ cfpkt_extr_head(pkt, &linkid, 1); break; @@ -465,6 +493,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.datagram.connid = le32_to_cpu(tmp32); + if (CFCTRL_ERR_BIT & cmdrsp) + break; /* Link ID */ cfpkt_extr_head(pkt, &linkid, 1); break; @@ -483,6 +513,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) *cp++ = tmp; *cp = '\0'; + if (CFCTRL_ERR_BIT & cmdrsp) + break; /* Link ID */ cfpkt_extr_head(pkt, &linkid, 1); @@ -519,6 +551,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) cfpkt_extr_head(pkt, &tmp, 1); *cp++ = tmp; } + if (CFCTRL_ERR_BIT & cmdrsp) + break; /* Link ID */ cfpkt_extr_head(pkt, &linkid, 1); /* Length */ @@ -560,13 +594,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) break; case CFCTRL_CMD_LINK_DESTROY: cfpkt_extr_head(pkt, &linkid, 1); - rsp.cmd = cmd; - rsp.channel_id = linkid; - req = cfctrl_remove_req(cfctrl, &rsp); - cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid, - req ? req->client_layer : NULL); - if (req != NULL) - kfree(req); + cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); break; case CFCTRL_CMD_LINK_ERR: pr_err("CAIF: %s(): Frame Error Indication received\n", @@ -608,7 +636,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, case CAIF_CTRLCMD_FLOW_OFF_IND: spin_lock(&this->info_list_lock); if (this->first_req != NULL) { - pr_warning("CAIF: %s(): Received flow off in " + pr_debug("CAIF: %s(): Received flow off in " "control layer", __func__); } spin_unlock(&this->info_list_lock); @@ -633,6 +661,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt) if (!ctrl->loop_linkused[linkid]) goto found; spin_unlock(&ctrl->loop_linkid_lock); + pr_err("CAIF: %s(): Out of link-ids\n", __func__); return -EINVAL; found: if (!ctrl->loop_linkused[linkid]) diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 6fb9f9e..7372f27 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -62,6 +62,7 @@ int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid) { struct cfmuxl *muxl = container_obj(layr); spin_lock(&muxl->receive_lock); + cfsrvl_get(up); list_add(&up->node, &muxl->srvl_list); spin_unlock(&muxl->receive_lock); return 0; @@ -172,8 +173,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) struct cfmuxl *muxl = container_obj(layr); spin_lock(&muxl->receive_lock); up = get_up(muxl, id); + if (up == NULL) + return NULL; memset(muxl->up_cache, 0, sizeof(muxl->up_cache)); list_del(&up->node); + cfsrvl_put(up); spin_unlock(&muxl->receive_lock); return up; } @@ -203,8 +207,9 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt) */ return /* CFGLU_EPROT; */ 0; } - + cfsrvl_get(up); ret = up->receive(up, pkt); + cfsrvl_put(up); return ret; } diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index d470c51..aff31f3 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -158,6 +158,13 @@ void cfsrvl_init(struct cfsrvl *service, service->layer.ctrlcmd = cfservl_ctrlcmd; service->layer.modemcmd = cfservl_modemcmd; service->dev_info = *dev_info; + kref_init(&service->ref); +} + +void cfsrvl_release(struct kref *kref) +{ + struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); + kfree(service); } bool cfsrvl_ready(struct cfsrvl *service, int *err) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index f622ff1..610966a 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -22,10 +22,10 @@ #include <net/caif/cfpkt.h> #include <net/caif/caif_dev.h> -#define CAIF_CONNECT_TIMEOUT 30 +/* GPRS PDP connection has MTU to 1500 */ #define SIZE_MTU 1500 -#define SIZE_MTU_MAX 4080 -#define SIZE_MTU_MIN 68 +/* 5 sec. connect timeout */ +#define CONNECT_TIMEOUT (5 * HZ) #define CAIF_NET_DEFAULT_QUEUE_LEN 500 #undef pr_debug @@ -37,6 +37,13 @@ static LIST_HEAD(chnl_net_list); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("caif"); +enum caif_states { + CAIF_CONNECTED = 1, + CAIF_CONNECTING, + CAIF_DISCONNECTED, + CAIF_SHUTDOWN +}; + struct chnl_net { struct cflayer chnl; struct net_device_stats stats; @@ -47,7 +54,7 @@ struct chnl_net { wait_queue_head_t netmgmt_wq; /* Flow status to remember and control the transmission. */ bool flowenabled; - bool pending_close; + enum caif_states state; }; static void robust_list_del(struct list_head *delete_node) @@ -58,15 +65,16 @@ static void robust_list_del(struct list_head *delete_node) list_for_each_safe(list_node, n, &chnl_net_list) { if (list_node == delete_node) { list_del(list_node); - break; + return; } } + WARN_ON(1); } static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; - struct chnl_net *priv = NULL; + struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); int pktlen; int err = 0; @@ -91,7 +99,6 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) else skb->ip_summed = CHECKSUM_NONE; - /* FIXME: Drivers should call this in tasklet context. */ if (in_interrupt()) netif_rx(skb); else @@ -117,23 +124,25 @@ static void close_work(struct work_struct *work) struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; - rtnl_lock(); + /* May be called with or without RTNL lock held */ + int islocked = rtnl_is_locked(); + if (!islocked) + rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); - if (!dev->pending_close) - continue; - list_del(list_node); - delete_device(dev); + if (dev->state == CAIF_SHUTDOWN) + dev_close(dev->netdev); } - rtnl_unlock(); + if (!islocked) + rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, int phyid) { - struct chnl_net *priv; - pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n", + struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); + pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", __func__, flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : @@ -143,21 +152,31 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND"); - priv = container_of(layr, struct chnl_net, chnl); + switch (flow) { case CAIF_CTRLCMD_FLOW_OFF_IND: + priv->flowenabled = false; + netif_stop_queue(priv->netdev); + break; case CAIF_CTRLCMD_DEINIT_RSP: + priv->state = CAIF_DISCONNECTED; + break; case CAIF_CTRLCMD_INIT_FAIL_RSP: + priv->state = CAIF_DISCONNECTED; + wake_up_interruptible(&priv->netmgmt_wq); + break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: - priv->flowenabled = false; + priv->state = CAIF_SHUTDOWN; netif_tx_disable(priv->netdev); - pr_warning("CAIF: %s(): done\n", __func__); - priv->pending_close = 1; schedule_work(&close_worker); break; case CAIF_CTRLCMD_FLOW_ON_IND: + priv->flowenabled = true; + netif_wake_queue(priv->netdev); + break; case CAIF_CTRLCMD_INIT_RSP: + priv->state = CAIF_CONNECTED; priv->flowenabled = true; netif_wake_queue(priv->netdev); wake_up_interruptible(&priv->netmgmt_wq); @@ -194,9 +213,6 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev) pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb); - pr_debug("CAIF: %s(): transmit inst %s %d,%p\n", - __func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn); - /* Send the packet down the stack. */ result = priv->chnl.dn->transmit(priv->chnl.dn, pkt); if (result) { @@ -217,61 +233,59 @@ static int chnl_net_open(struct net_device *dev) struct chnl_net *priv = NULL; int result = -1; ASSERT_RTNL(); - priv = netdev_priv(dev); - pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name); - if (!priv) { pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); return -ENODEV; } - result = caif_connect_client(&priv->conn_req, &priv->chnl); - if (result != 0) { - pr_debug("CAIF: %s(): err: " - "Unable to register and open device, Err:%d\n", - __func__, - result); - return -ENODEV; + + if (priv->state != CAIF_CONNECTING) { + priv->state = CAIF_CONNECTING; + result = caif_connect_client(&priv->conn_req, &priv->chnl); + if (result != 0) { + priv->state = CAIF_DISCONNECTED; + pr_debug("CAIF: %s(): err: " + "Unable to register and open device," + " Err:%d\n", + __func__, + result); + return result; + } } - result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled); + + result = wait_event_interruptible_timeout(priv->netmgmt_wq, + priv->state != CAIF_CONNECTING, + CONNECT_TIMEOUT); if (result == -ERESTARTSYS) { pr_debug("CAIF: %s(): wait_event_interruptible" " woken by a signal\n", __func__); return -ERESTARTSYS; - } else - pr_debug("CAIF: %s(): Flow on recieved\n", __func__); + } + if (result == 0) { + pr_debug("CAIF: %s(): connect timeout\n", __func__); + caif_disconnect_client(&priv->chnl); + priv->state = CAIF_DISCONNECTED; + pr_debug("CAIF: %s(): state disconnected\n", __func__); + return -ETIMEDOUT; + } + if (priv->state != CAIF_CONNECTED) { + pr_debug("CAIF: %s(): connect failed\n", __func__); + return -ECONNREFUSED; + } + pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); return 0; } static int chnl_net_stop(struct net_device *dev) { struct chnl_net *priv; - int result = -1; + ASSERT_RTNL(); priv = netdev_priv(dev); - - result = caif_disconnect_client(&priv->chnl); - if (result != 0) { - pr_debug("CAIF: %s(): chnl_net_stop: err: " - "Unable to STOP device, Err:%d\n", - __func__, result); - return -EBUSY; - } - result = wait_event_interruptible(priv->netmgmt_wq, - !priv->flowenabled); - - if (result == -ERESTARTSYS) { - pr_debug("CAIF: %s(): wait_event_interruptible woken by" - " signal, signal_pending(current) = %d\n", - __func__, - signal_pending(current)); - } else { - pr_debug("CAIF: %s(): disconnect received\n", __func__); - - } - + priv->state = CAIF_DISCONNECTED; + caif_disconnect_client(&priv->chnl); return 0; } @@ -377,6 +391,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); + dev_net_set(caifdev->netdev, src_net); + ret = register_netdevice(dev); if (ret) pr_warning("CAIF: %s(): device rtml registration failed\n", diff --git a/net/core/datagram.c b/net/core/datagram.c index 2dccd4e..e0097531 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) error = 0; *timeo_p = schedule_timeout(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return error; interrupted: error = sock_intr_errno(*timeo_p); @@ -229,9 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + if (likely(atomic_read(&skb->users) == 1)) + smp_rmb(); + else if (likely(!atomic_dec_and_test(&skb->users))) + return; + + lock_sock_bh(sk); + skb_orphan(skb); + sk_mem_reclaim_partial(sk); + unlock_sock_bh(sk); + + /* skb is now orphaned, can be freed outside of locked section */ + __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); @@ -726,7 +735,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/core/dev.c b/net/core/dev.c index b31d5d6..32611c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q) local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - q->next_sched = sd->output_queue; - sd->output_queue = q; + q->next_sched = NULL; + *sd->output_queue_tailp = q; + sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1902,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - if (netif_needs_gso(dev, skb)) { - if (unlikely(dev_gso_segment(skb))) - goto out_kfree_skb; - if (skb->next) - goto gso; - } - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -1917,6 +1911,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); skb_orphan_try(skb); + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } + rc = ops->ndo_start_xmit(skb, dev); if (rc == NETDEV_TX_OK) txq_trans_update(txq); @@ -1937,7 +1939,6 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); - skb_orphan_try(nskb); rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2015,8 +2016,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); - if (sk && rcu_dereference_check(sk->sk_dst_cache, 1)) - sk_tx_queue_set(sk, queue_index); + if (sk) { + struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } } } @@ -2200,7 +2205,13 @@ int netdev_max_backlog __read_mostly = 1000; int netdev_budget __read_mostly = 300; int weight_p __read_mostly = 64; /* old backlog weight */ -DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; +/* Called with irq disabled */ +static inline void ____napi_schedule(struct softnet_data *sd, + struct napi_struct *napi) +{ + list_add_tail(&napi->poll_list, &sd->poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); +} #ifdef CONFIG_RPS @@ -2225,7 +2236,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, int cpu = -1; u8 ip_proto; u16 tcpu; - u32 addr1, addr2, ports, ihl; + u32 addr1, addr2, ihl; + union { + u32 v32; + u16 v16[2]; + } ports; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -2271,7 +2286,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, default: goto done; } - ports = 0; switch (ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -2281,25 +2295,20 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, case IPPROTO_SCTP: case IPPROTO_UDPLITE: if (pskb_may_pull(skb, (ihl * 4) + 4)) { - __be16 *hports = (__be16 *) (skb->data + (ihl * 4)); - u32 sport, dport; - - sport = (__force u16) hports[0]; - dport = (__force u16) hports[1]; - if (dport < sport) - swap(sport, dport); - ports = (sport << 16) + dport; + ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); + if (ports.v16[1] < ports.v16[0]) + swap(ports.v16[0], ports.v16[1]); + break; } - break; - default: + ports.v32 = 0; break; } /* get a consistent hash (same value on both flow directions) */ if (addr2 < addr1) swap(addr1, addr2); - skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd); + skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); if (!skb->rxhash) skb->rxhash = 1; @@ -2362,8 +2371,8 @@ static void rps_trigger_softirq(void *data) { struct softnet_data *sd = data; - __napi_schedule(&sd->backlog); - __get_cpu_var(netdev_rx_stat).received_rps++; + ____napi_schedule(sd, &sd->backlog); + sd->received_rps++; } #endif /* CONFIG_RPS */ @@ -2402,15 +2411,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); - __get_cpu_var(netdev_rx_stat).total++; rps_lock(sd); - if (sd->input_pkt_queue.qlen <= netdev_max_backlog) { - if (sd->input_pkt_queue.qlen) { + if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); #ifdef CONFIG_RPS - *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen; + *qtail = sd->input_queue_head + + skb_queue_len(&sd->input_pkt_queue); #endif rps_unlock(sd); local_irq_restore(flags); @@ -2420,14 +2429,14 @@ enqueue: /* Schedule NAPI for backlog device */ if (napi_schedule_prep(&sd->backlog)) { if (!rps_ipi_queued(sd)) - __napi_schedule(&sd->backlog); + ____napi_schedule(sd, &sd->backlog); } goto enqueue; } + sd->dropped++; rps_unlock(sd); - __get_cpu_var(netdev_rx_stat).dropped++; local_irq_restore(flags); kfree_skb(skb); @@ -2527,6 +2536,7 @@ static void net_tx_action(struct softirq_action *h) local_irq_disable(); head = sd->output_queue; sd->output_queue = NULL; + sd->output_queue_tailp = &sd->output_queue; local_irq_enable(); while (head) { @@ -2801,7 +2811,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb->dev = master; } - __get_cpu_var(netdev_rx_stat).total++; + __get_cpu_var(softnet_data).processed++; skb_reset_network_header(skb); skb_reset_transport_header(skb); @@ -2930,13 +2940,21 @@ static void flush_backlog(void *arg) struct sk_buff *skb, *tmp; rps_lock(sd); - skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) + skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); - input_queue_head_incr(sd); + input_queue_head_add(sd, 1); } + } rps_unlock(sd); + + skb_queue_walk_safe(&sd->process_queue, skb, tmp) { + if (skb->dev == dev) { + __skb_unlink(skb, &sd->process_queue); + kfree_skb(skb); + } + } } static int napi_gro_complete(struct sk_buff *skb) @@ -3239,30 +3257,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* + * net_rps_action sends any pending IPI's for rps. + * Note: called with local irq disabled, but exits with local irq enabled. + */ +static void net_rps_action_and_irq_enable(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + struct softnet_data *remsd = sd->rps_ipi_list; + + if (remsd) { + sd->rps_ipi_list = NULL; + + local_irq_enable(); + + /* Send pending IPI's to kick RPS processing on remote cpus. */ + while (remsd) { + struct softnet_data *next = remsd->rps_ipi_next; + + if (cpu_online(remsd->cpu)) + __smp_call_function_single(remsd->cpu, + &remsd->csd, 0); + remsd = next; + } + } else +#endif + local_irq_enable(); +} + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); +#ifdef CONFIG_RPS + /* Check if we have pending ipi, its better to send them now, + * not waiting net_rx_action() end. + */ + if (sd->rps_ipi_list) { + local_irq_disable(); + net_rps_action_and_irq_enable(sd); + } +#endif napi->weight = weight_p; - do { + local_irq_disable(); + while (work < quota) { struct sk_buff *skb; + unsigned int qlen; - local_irq_disable(); - rps_lock(sd); - skb = __skb_dequeue(&sd->input_pkt_queue); - if (!skb) { - __napi_complete(napi); - rps_unlock(sd); + while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); - break; + __netif_receive_skb(skb); + if (++work >= quota) + return work; + local_irq_disable(); } - input_queue_head_incr(sd); - rps_unlock(sd); - local_irq_enable(); - __netif_receive_skb(skb); - } while (++work < quota); + rps_lock(sd); + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen) { + input_queue_head_add(sd, qlen); + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); + } + if (qlen < quota - work) { + /* + * Inline a custom version of __napi_complete(). + * only current cpu owns and manipulates this napi, + * and NAPI_STATE_SCHED is the only possible flag set on backlog. + * we can use a plain write instead of clear_bit(), + * and we dont need an smp_mb() memory barrier. + */ + list_del(&napi->poll_list); + napi->state = 0; + + quota = work + qlen; + } + rps_unlock(sd); + } + local_irq_enable(); return work; } @@ -3278,8 +3351,7 @@ void __napi_schedule(struct napi_struct *n) unsigned long flags; local_irq_save(flags); - list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + ____napi_schedule(&__get_cpu_var(softnet_data), n); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); @@ -3350,45 +3422,16 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); -/* - * net_rps_action sends any pending IPI's for rps. - * Note: called with local irq disabled, but exits with local irq enabled. - */ -static void net_rps_action_and_irq_disable(void) -{ -#ifdef CONFIG_RPS - struct softnet_data *sd = &__get_cpu_var(softnet_data); - struct softnet_data *remsd = sd->rps_ipi_list; - - if (remsd) { - sd->rps_ipi_list = NULL; - - local_irq_enable(); - - /* Send pending IPI's to kick RPS processing on remote cpus. */ - while (remsd) { - struct softnet_data *next = remsd->rps_ipi_next; - - if (cpu_online(remsd->cpu)) - __smp_call_function_single(remsd->cpu, - &remsd->csd, 0); - remsd = next; - } - } else -#endif - local_irq_enable(); -} - static void net_rx_action(struct softirq_action *h) { - struct list_head *list = &__get_cpu_var(softnet_data).poll_list; + struct softnet_data *sd = &__get_cpu_var(softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; local_irq_disable(); - while (!list_empty(list)) { + while (!list_empty(&sd->poll_list)) { struct napi_struct *n; int work, weight; @@ -3406,7 +3449,7 @@ static void net_rx_action(struct softirq_action *h) * entries to the tail of this list, and only ->poll() * calls can remove this head entry from the list. */ - n = list_first_entry(list, struct napi_struct, poll_list); + n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); have = netpoll_poll_lock(n); @@ -3441,13 +3484,13 @@ static void net_rx_action(struct softirq_action *h) napi_complete(n); local_irq_disable(); } else - list_move_tail(&n->poll_list, list); + list_move_tail(&n->poll_list, &sd->poll_list); } netpoll_poll_unlock(have); } out: - net_rps_action_and_irq_disable(); + net_rps_action_and_irq_enable(sd); #ifdef CONFIG_NET_DMA /* @@ -3460,7 +3503,7 @@ out: return; softnet_break: - __get_cpu_var(netdev_rx_stat).time_squeeze++; + sd->time_squeeze++; __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } @@ -3661,17 +3704,17 @@ static int dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct netif_rx_stats *softnet_get_online(loff_t *pos) +static struct softnet_data *softnet_get_online(loff_t *pos) { - struct netif_rx_stats *rc = NULL; + struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { - rc = &per_cpu(netdev_rx_stat, *pos); + sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; - return rc; + return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) @@ -3691,12 +3734,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { - struct netif_rx_stats *s = v; + struct softnet_data *sd = v; seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, 0, + sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision, s->received_rps); + sd->cpu_collision, sd->received_rps); return 0; } @@ -5584,7 +5627,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; @@ -5605,13 +5647,13 @@ static int dev_cpu_callback(struct notifier_block *nfb, *list_skb = oldsd->completion_queue; oldsd->completion_queue = NULL; - /* Find end of our output_queue. */ - list_net = &sd->output_queue; - while (*list_net) - list_net = &(*list_net)->next_sched; /* Append output queue from offline CPU. */ - *list_net = oldsd->output_queue; - oldsd->output_queue = NULL; + if (oldsd->output_queue) { + *sd->output_queue_tailp = oldsd->output_queue; + sd->output_queue_tailp = oldsd->output_queue_tailp; + oldsd->output_queue = NULL; + oldsd->output_queue_tailp = &oldsd->output_queue; + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -5619,8 +5661,10 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + input_queue_head_add(oldsd, 1); } + while ((skb = __skb_dequeue(&oldsd->process_queue))) + netif_rx(skb); return NOTIFY_OK; } @@ -5838,10 +5882,13 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); + memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); + skb_queue_head_init(&sd->process_queue); sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); - + sd->output_queue = NULL; + sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 1bc6659..42e84e0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -122,7 +122,7 @@ errout: } struct fib_rules_ops * -fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) +fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) { struct fib_rules_ops *ops; int err; diff --git a/net/core/filter.c b/net/core/filter.c index ff943be..da69fb7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -302,6 +302,8 @@ load_b: A = skb->pkt_type; continue; case SKF_AD_IFINDEX: + if (!skb->dev) + return 0; A = skb->dev->ifindex; continue; case SKF_AD_MARK: @@ -310,6 +312,11 @@ load_b: case SKF_AD_QUEUE: A = skb->queue_mapping; continue; + case SKF_AD_HATYPE: + if (!skb->dev) + return 0; + A = skb->dev->type; + continue; case SKF_AD_NLATTR: { struct nlattr *nla; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bd8c471..c988e68 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static void net_generic_release(struct rcu_head *rcu) +{ + struct net_generic *ng; + + ng = container_of(rcu, struct net_generic, rcu); + kfree(ng); +} + +static int net_assign_generic(struct net *net, int id, void *data) +{ + struct net_generic *ng, *old_ng; + + BUG_ON(!mutex_is_locked(&net_mutex)); + BUG_ON(id == 0); + + ng = old_ng = net->gen; + if (old_ng->len >= id) + goto assign; + + ng = kzalloc(sizeof(struct net_generic) + + id * sizeof(void *), GFP_KERNEL); + if (ng == NULL) + return -ENOMEM; + + /* + * Some synchronisation notes: + * + * The net_generic explores the net->gen array inside rcu + * read section. Besides once set the net->gen->ptr[x] + * pointer never changes (see rules in netns/generic.h). + * + * That said, we simply duplicate this array and schedule + * the old copy for kfree after a grace period. + */ + + ng->len = id; + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); + + rcu_assign_pointer(net->gen, ng); + call_rcu(&old_ng->rcu, net_generic_release); +assign: + ng->ptr[id - 1] = data; + return 0; +} + static int ops_init(const struct pernet_operations *ops, struct net *net) { int err; @@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); * addition run the exit method for all existing network * namespaces. */ -void unregister_pernet_subsys(struct pernet_operations *module) +void unregister_pernet_subsys(struct pernet_operations *ops) { mutex_lock(&net_mutex); - unregister_pernet_operations(module); + unregister_pernet_operations(ops); mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); - -static void net_generic_release(struct rcu_head *rcu) -{ - struct net_generic *ng; - - ng = container_of(rcu, struct net_generic, rcu); - kfree(ng); -} - -int net_assign_generic(struct net *net, int id, void *data) -{ - struct net_generic *ng, *old_ng; - - BUG_ON(!mutex_is_locked(&net_mutex)); - BUG_ON(id == 0); - - ng = old_ng = net->gen; - if (old_ng->len >= id) - goto assign; - - ng = kzalloc(sizeof(struct net_generic) + - id * sizeof(void *), GFP_KERNEL); - if (ng == NULL) - return -ENOMEM; - - /* - * Some synchronisation notes: - * - * The net_generic explores the net->gen array inside rcu - * read section. Besides once set the net->gen->ptr[x] - * pointer never changes (see rules in netns/generic.h). - * - * That said, we simply duplicate this array and schedule - * the old copy for kfree after a grace period. - */ - - ng->len = id; - memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); - - rcu_assign_pointer(net->gen, ng); - call_rcu(&old_ng->rcu, net_generic_release); -assign: - ng->ptr[id - 1] = data; - return 0; -} -EXPORT_SYMBOL_GPL(net_assign_generic); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a58f59b..94825b1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi) } } -void netpoll_poll(struct netpoll *np) +void netpoll_poll_dev(struct net_device *dev) { - struct net_device *dev = np->dev; const struct net_device_ops *ops; if (!dev || !netif_running(dev)) @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np) zap_completion_queue(); } +void netpoll_poll(struct netpoll *np) +{ + netpoll_poll_dev(np->dev); +} + static void refill_skbs(void) { struct sk_buff *skb; @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; unsigned long tries; @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { + dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); + dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->netdev_ops->ndo_poll_controller) { + npinfo->netpoll = np; + + if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || + !ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np) } if (atomic_dec_and_test(&npinfo->refcnt)) { + const struct net_device_ops *ops; skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); cancel_rearming_delayed_work(&npinfo->tx_work); @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np) /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); kfree(npinfo); - np->dev->npinfo = NULL; + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(np->dev); + else + np->dev->npinfo = NULL; } } @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap) atomic_dec(&trapped); } +EXPORT_SYMBOL(netpoll_send_skb); EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); EXPORT_SYMBOL(netpoll_print_options); @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); +EXPORT_SYMBOL(netpoll_poll_dev); EXPORT_SYMBOL(netpoll_poll); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 78c85985c..23a71cb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *rtnl_msg_handlers[NPROTO]; +static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) { struct rtnl_link *tab; - if (protocol < NPROTO) + if (protocol <= RTNL_FAMILY_MAX) tab = rtnl_msg_handlers[protocol]; else tab = NULL; @@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) { struct rtnl_link *tab; - if (protocol < NPROTO) + if (protocol <= RTNL_FAMILY_MAX) tab = rtnl_msg_handlers[protocol]; else tab = NULL; @@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype, struct rtnl_link *tab; int msgindex; - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); tab = rtnl_msg_handlers[protocol]; @@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype) { int msgindex; - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); if (rtnl_msg_handlers[protocol] == NULL) @@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); kfree(rtnl_msg_handlers[protocol]); rtnl_msg_handlers[protocol] = NULL; @@ -1319,10 +1319,11 @@ replay: err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) { + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); + if (err < 0) goto out; - } err = rtnl_configure_link(dev, ifm); if (err < 0) @@ -1384,7 +1385,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; - for (idx = 1; idx < NPROTO; idx++) { + for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; if (idx < s_idx || idx == PF_PACKET) continue; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bdea0ef..a9b0e1f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { * * Out of line support code for skb_put(). Not user callable. */ -void skb_over_panic(struct sk_buff *skb, int sz, void *here) +static void skb_over_panic(struct sk_buff *skb, int sz, void *here) { printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " "data:%p tail:%#lx end:%#lx dev:%s\n", @@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) skb->dev ? skb->dev->name : "<NULL>"); BUG(); } -EXPORT_SYMBOL(skb_over_panic); /** * skb_under_panic - private function @@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic); * Out of line support code for skb_push(). Not user callable. */ -void skb_under_panic(struct sk_buff *skb, int sz, void *here) +static void skb_under_panic(struct sk_buff *skb, int sz, void *here) { printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " "data:%p tail:%#lx end:%#lx dev:%s\n", @@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) skb->dev ? skb->dev->name : "<NULL>"); BUG(); } -EXPORT_SYMBOL(skb_under_panic); /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the @@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; + prefetchw(skb); size = SKB_DATA_ALIGN(size); data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), gfp_mask, node); if (!data) goto nodata; + prefetchw(data + size); /* * Only clear those fields we need to clear, not those that we will @@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->tx_flags.flags = 0; - skb_frag_list_init(skb); - memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); if (fclone) { struct sk_buff *child = skb + 1; @@ -507,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) return 0; skb_release_head_state(skb); + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->tx_flags.flags = 0; - skb_frag_list_init(skb); - memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); memset(skb, 0, offsetof(struct sk_buff, tail)); skb->data = skb->head + NET_SKB_PAD; @@ -1053,7 +1040,7 @@ EXPORT_SYMBOL(skb_push); */ unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) { - return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); + return skb_pull_inline(skb, len); } EXPORT_SYMBOL(skb_pull); diff --git a/net/core/sock.c b/net/core/sock.c index 7effa1e..94c4aff 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; + if (sk_rcvqueues_full(sk, skb)) { + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } if (nested) bh_lock_sock_nested(sk); else @@ -1207,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); - newsk->sk_sleep = NULL; + newsk->sk_wq = NULL; if (newsk->sk_prot->sockets_allocated) percpu_counter_inc(newsk->sk_prot->sockets_allocated); @@ -1395,7 +1399,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) if (signal_pending(current)) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) @@ -1404,7 +1408,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; timeo = schedule_timeout(timeo); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } @@ -1570,11 +1574,11 @@ int sk_wait_data(struct sock *sk, long *timeo) int rc; DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); @@ -1796,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage); static void sock_def_wakeup(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); - read_unlock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_all(&wq->wait); + rcu_read_unlock(); } static void sock_def_error_report(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_poll(sk->sk_sleep, POLLERR); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_readable(struct sock *sk, int len) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ @@ -1838,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_destruct(struct sock *sk) @@ -1885,7 +1901,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); @@ -1893,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) if (sock) { sk->sk_type = sock->type; - sk->sk_sleep = &sock->wait; + sk->sk_wq = sock->wq; sock->sk = sk; } else - sk->sk_sleep = NULL; + sk->sk_wq = NULL; spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); diff --git a/net/core/stream.c b/net/core/stream.c index a37debf..cc196f42b 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -28,15 +28,19 @@ void sk_stream_write_space(struct sock *sk) { struct socket *sock = sk->sk_socket; + struct socket_wq *wq; if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { clear_bit(SOCK_NOSPACE, &sock->flags); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); - if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) + if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); } } @@ -66,13 +70,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, !sk->sk_err && !((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); return 0; @@ -96,13 +100,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) break; } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } } @@ -126,7 +130,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) while (1) { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; @@ -157,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/dccp/output.c b/net/dccp/output.c index e98b65e..aadbdb5 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss); void dccp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /** @@ -225,7 +227,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) dccp_pr_debug("delayed send by %d msec\n", delay); jiffdelay = msecs_to_jiffies(delay); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; release_sock(sk); @@ -241,7 +243,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); } while ((delay = rc) > 0); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; do_error: diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a0e38d8..b03ecf6 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 55e3b6b..d6b93d1 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CC) @@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) @@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); @@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } @@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } skb_queue_walk_safe(queue, skb, n) { @@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, !dn_queue_too_long(scp, queue, flags)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); continue; } diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index af28dcc..48fdf10 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -216,8 +216,8 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static struct fib_rules_ops dn_fib_rules_ops_template = { - .family = FIB_RULES_DECNET, +static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = { + .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), .action = dn_fib_rule_action, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 205a1c1..61ec032 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb) default: printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n", - dev->name, (int)eth->h_proto); + dev->name, ntohs(eth->h_proto)); memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); break; @@ -162,7 +162,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb_reset_mac_header(skb); - skb_pull(skb, ETH_HLEN); + skb_pull_inline(skb, ETH_HLEN); eth = eth_hdr(skb); if (unlikely(is_multicast_ether_addr(eth->h_dest))) { diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index c7da600..93c91b6 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -151,6 +151,9 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + if (!dev) + return -ENODEV; + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c5376c7..c6c43bc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -419,7 +419,7 @@ int inet_release(struct socket *sock) if (sk) { long timeout; - inet_rps_reset_flow(sk); + sock_rps_reset_flow(sk); /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); @@ -548,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -561,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } @@ -722,7 +722,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && inet_autobind(sk)) @@ -737,7 +737,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, { struct sock *sk = sock->sk; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && inet_autobind(sk)) @@ -755,7 +755,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, int addr_len = 0; int err; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); @@ -1323,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto out_unlock; - id = ntohl(*(u32 *)&iph->id); - flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + id = ntohl(*(__be32 *)&iph->id); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1337,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if ((iph->protocol ^ iph2->protocol) | (iph->tos ^ iph2->tos) | - (iph->saddr ^ iph2->saddr) | - (iph->daddr ^ iph2->daddr)) { + ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | + ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 3ec84fe..76daeb5 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -245,8 +245,8 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) rt_cache_flush(ops->fro_net, -1); } -static struct fib_rules_ops fib4_rules_ops_template = { - .family = FIB_RULES_IPV4, +static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { + .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), .action = fib4_rule_action, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 59a8387..c98f115 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -209,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { struct node *ret = tnode_get_child(tn, i); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline int tnode_child_length(const struct tnode *tn) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429..e0a3e35 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) @@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) if (!timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d979710..2528974 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -120,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b0aa054..ce23178 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -954,6 +954,22 @@ e_inval: return -EINVAL; } +/** + * ip_queue_rcv_skb - Queue an skb into sock receive queue + * @sk: socket + * @skb: buffer + * + * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option + * is not set, we drop skb dst entry now, while dst cache line is hot. + */ +int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) + skb_dst_drop(skb); + return sock_queue_rcv_skb(sk, skb); +} +EXPORT_SYMBOL(ip_queue_rcv_skb); + int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1aa498d..f3f1c6b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -128,8 +128,8 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); -static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm); +static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -216,8 +216,8 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static struct fib_rules_ops ipmr_rules_ops_template = { - .family = FIB_RULES_IPMR, +static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { + .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), .action = ipmr_rule_action, @@ -831,7 +831,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); - if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { nlh->nlmsg_len = (skb_tail_pointer(skb) - (u8 *)nlh); } else { @@ -1772,10 +1772,10 @@ int ip_mr_input(struct sk_buff *skb) vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) { - int err = ipmr_cache_unresolved(mrt, vif, skb); + int err2 = ipmr_cache_unresolved(mrt, vif, skb); read_unlock(&mrt_lock); - return err; + return err2; } read_unlock(&mrt_lock); kfree_skb(skb); @@ -1904,9 +1904,8 @@ drop: } #endif -static int -ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, - struct rtmsg *rtm) +static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; @@ -1994,11 +1993,93 @@ int ipmr_get_route(struct net *net, if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) cache->mfc_flags |= MFC_NOTIFY; - err = ipmr_fill_mroute(mrt, skb, cache, rtm); + err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); return err; } +static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 pid, u32 seq, struct mfc_cache *c) +{ + struct nlmsghdr *nlh; + struct rtmsg *rtm; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = RTNL_FAMILY_IPMR; + rtm->rtm_dst_len = 32; + rtm->rtm_src_len = 32; + rtm->rtm_tos = 0; + rtm->rtm_table = mrt->id; + NLA_PUT_U32(skb, RTA_TABLE, mrt->id); + rtm->rtm_type = RTN_MULTICAST; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_protocol = RTPROT_UNSPEC; + rtm->rtm_flags = 0; + + NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); + NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); + + if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mr_table *mrt; + struct mfc_cache *mfc; + unsigned int t = 0, s_t; + unsigned int h = 0, s_h; + unsigned int e = 0, s_e; + + s_t = cb->args[0]; + s_h = cb->args[1]; + s_e = cb->args[2]; + + read_lock(&mrt_lock); + ipmr_for_each_table(mrt, net) { + if (t < s_t) + goto next_table; + if (t > s_t) + s_h = 0; + for (h = s_h; h < MFC_LINES; h++) { + list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { + if (e < s_e) + goto next_entry; + if (ipmr_fill_mroute(mrt, skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + mfc) < 0) + goto done; +next_entry: + e++; + } + e = s_e = 0; + } + s_h = 0; +next_table: + t++; + } +done: + read_unlock(&mrt_lock); + + cb->args[2] = e; + cb->args[1] = h; + cb->args[0] = t; + + return skb->len; +} + #ifdef CONFIG_PROC_FS /* * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif @@ -2227,9 +2308,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct ipmr_mfc_iter *it = seq->private; const struct mr_table *mrt = it->mrt; - seq_printf(seq, "%08lX %08lX %-3hd", - (unsigned long) mfc->mfc_mcastgrp, - (unsigned long) mfc->mfc_origin, + seq_printf(seq, "%08X %08X %-3hd", + (__force u32) mfc->mfc_mcastgrp, + (__force u32) mfc->mfc_origin, mfc->mfc_parent); if (it->cache != &mrt->mfc_unres_queue) { @@ -2355,6 +2436,7 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbda0d5..2c7a163 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ - if (sock_queue_rcv_skb(sk, skb) < 0) { + if (ip_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb562fd..dea3f92 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; -static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static int rt_chain_length_max __read_mostly = 20; static struct delayed_work expires_work; @@ -258,10 +257,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); (__raw_get_cpu_var(rt_cache_stat).field++) static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, - int genid) + int genid) { - return jhash_3words((__force u32)(__be32)(daddr), - (__force u32)(__be32)(saddr), + return jhash_3words((__force u32)daddr, (__force u32)saddr, idx, genid) & rt_hash_mask; } @@ -378,12 +376,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) struct rtable *r = v; int len; - seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" - "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", + seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" + "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->u.dst.dev ? r->u.dst.dev->name : "*", - (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, + (__force u32)r->rt_dst, + (__force u32)r->rt_gateway, r->rt_flags, atomic_read(&r->u.dst.__refcnt), - r->u.dst.__use, 0, (unsigned long)r->rt_src, + r->u.dst.__use, 0, (__force u32)r->rt_src, (dst_metric(&r->u.dst, RTAX_ADVMSS) ? (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), dst_metric(&r->u.dst, RTAX_WINDOW), @@ -685,18 +684,17 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | + return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | + ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | (fl1->iif ^ fl2->iif)) == 0); } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | + return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | + ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ - *(u16 *)&fl2->nl_u.ip4_u.tos) | + (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -919,32 +917,11 @@ void rt_cache_flush_batch(void) rt_do_flush(!in_softirq()); } -/* - * We change rt_genid and let gc do the cleanup - */ -static void rt_secret_rebuild(unsigned long __net) -{ - struct net *net = (struct net *)__net; - rt_cache_invalidate(net); - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); -} - -static void rt_secret_rebuild_oneshot(struct net *net) -{ - del_timer_sync(&net->ipv4.rt_secret_timer); - rt_cache_invalidate(net); - if (ip_rt_secret_interval) - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); -} - static void rt_emergency_hash_rebuild(struct net *net) { - if (net_ratelimit()) { + if (net_ratelimit()) printk(KERN_WARNING "Route hash chain too long!\n"); - printk(KERN_WARNING "Adjust your secret_interval!\n"); - } - - rt_secret_rebuild_oneshot(net); + rt_cache_invalidate(net); } /* @@ -2319,8 +2296,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->u.dst.rt_next)) { - if (((rth->fl.fl4_dst ^ daddr) | - (rth->fl.fl4_src ^ saddr) | + if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | + ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | rth->fl.oif | (rth->fl.fl4_tos ^ tos)) == 0 && @@ -3102,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static void rt_secret_reschedule(int old) -{ - struct net *net; - int new = ip_rt_secret_interval; - int diff = new - old; - - if (!diff) - return; - - rtnl_lock(); - for_each_net(net) { - int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); - long time; - - if (!new) - continue; - - if (deleted) { - time = net->ipv4.rt_secret_timer.expires - jiffies; - - if (time <= 0 || (time += diff) <= 0) - time = 0; - } else - time = new; - - mod_timer(&net->ipv4.rt_secret_timer, jiffies + time); - } - rtnl_unlock(); -} - -static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - int old = ip_rt_secret_interval; - int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); - - rt_secret_reschedule(old); - - return ret; -} - static ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", @@ -3252,13 +3187,6 @@ static ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "secret_interval", - .data = &ip_rt_secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = ipv4_sysctl_rt_secret_interval, - }, { } }; @@ -3337,34 +3265,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { }; #endif - -static __net_init int rt_secret_timer_init(struct net *net) +static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->ipv4.rt_genid, - (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - - net->ipv4.rt_secret_timer.function = rt_secret_rebuild; - net->ipv4.rt_secret_timer.data = (unsigned long)net; - init_timer_deferrable(&net->ipv4.rt_secret_timer); - - if (ip_rt_secret_interval) { - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); - } + get_random_bytes(&net->ipv4.rt_genid, + sizeof(net->ipv4.rt_genid)); return 0; } -static __net_exit void rt_secret_timer_exit(struct net *net) -{ - del_timer_sync(&net->ipv4.rt_secret_timer); -} - -static __net_initdata struct pernet_operations rt_secret_timer_ops = { - .init = rt_secret_timer_init, - .exit = rt_secret_timer_exit, +static __net_initdata struct pernet_operations rt_genid_ops = { + .init = rt_genid_init, }; @@ -3425,9 +3334,6 @@ int __init ip_rt_init(void) schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (register_pernet_subsys(&rt_secret_timer_ops)) - printk(KERN_ERR "Unable to setup rt_secret_timer\n"); - if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM @@ -3439,6 +3345,7 @@ int __init ip_rt_init(void) #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif + register_pernet_subsys(&rt_genid_ops); return rc; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf6..8ce2974 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -2298,7 +2298,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; + u32 elapsed = keepalive_time_elapsed(tp); if (tp->keepalive_time > elapsed) elapsed = tp->keepalive_time - elapsed; else @@ -2721,7 +2721,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct tcphdr *th2; unsigned int len; unsigned int thlen; - unsigned int flags; + __be32 flags; unsigned int mss = 1; unsigned int hlen; unsigned int off; @@ -2771,10 +2771,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: flush = NAPI_GRO_CB(p)->flush; - flush |= flags & TCP_FLAG_CWR; - flush |= (flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= th->ack_seq ^ th2->ack_seq; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); @@ -2795,8 +2795,9 @@ found: out_check_final: flush = len < mss; - flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | - TCP_FLAG_SYN | TCP_FLAG_FIN); + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) pp = head; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ae3ec15..e82162c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) goto drop; + skb_dst_drop(skb); __skb_pull(skb, th->doff * 4); TCP_ECN_accept_cwr(tp, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad08392..771f814 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1286,8 +1286,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_release; /* Secret recipe starts with IP addresses */ - *mess++ ^= daddr; - *mess++ ^= saddr; + *mess++ ^= (__force u32)daddr; + *mess++ ^= (__force u32)saddr; /* plus variable length Initiator Cookie */ c = (u8 *)mess; @@ -1672,7 +1672,7 @@ process: skb->dev = NULL; - inet_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb->rxhash); bh_lock_sock_nested(sk); ret = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b7d71f..5db3a2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,7 +861,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { - th->urg_ptr = 0xFFFF; + th->urg_ptr = htons(0xFFFF); th->urg = 1; } } @@ -888,7 +888,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_data_sent(tp, skb, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, + tcp_skb_pcount(skb)); err = icsk->icsk_af_ops->queue_xmit(skb); if (likely(err <= 0)) @@ -2485,7 +2486,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, *tail-- ^= TCP_SKB_CB(skb)->seq + 1; /* recommended */ - *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ sha_transform((__u32 *)&xvp->cookie_bakery[0], @@ -2503,7 +2504,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), tp, &opts); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c732be0..440a5c6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -517,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data) struct sock *sk = (struct sock *) data; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __u32 elapsed; + u32 elapsed; /* Only process if socket is not in use. */ bh_lock_sock(sk); @@ -554,7 +554,7 @@ static void tcp_keepalive_timer (unsigned long data) if (tp->packets_out || tcp_send_head(sk)) goto resched; - elapsed = tcp_time_stamp - tp->rcv_tstamp; + elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { if (icsk->icsk_probes_out >= keepalive_probes(tp)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 666b963..4560b29 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -307,13 +307,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, unsigned int port) { - return jhash_1word(saddr, net_hash_mix(net)) ^ port; + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); + udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); @@ -466,14 +466,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, daddr, hnum, dif, hslot2, slot2); if (!result) { - hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); + hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - INADDR_ANY, hnum, dif, + htonl(INADDR_ANY), hnum, dif, hslot2, slot2); } rcu_read_unlock(); @@ -1062,10 +1062,10 @@ static unsigned int first_packet_length(struct sock *sk) spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { - lock_sock(sk); + lock_sock_bh(sk); __skb_queue_purge(&list_kill); sk_mem_reclaim_partial(sk); - release_sock(sk); + unlock_sock_bh(sk); } return res; } @@ -1196,10 +1196,10 @@ out: return err; csum_copy_err: - lock_sock(sk); + lock_sock_bh(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - release_sock(sk); + unlock_sock_bh(sk); if (noblock) return -EAGAIN; @@ -1217,7 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - inet_rps_save_rxhash(sk, 0); + sock_rps_save_rxhash(sk, 0); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1262,9 +1262,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - inet_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb->rxhash); - rc = sock_queue_rcv_skb(sk, skb); + rc = ip_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; } + + if (sk_rcvqueues_full(sk, skb)) + goto drop; + rc = 0; bh_lock_sock(sk); @@ -1620,9 +1624,9 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { - lock_sock(sk); + lock_sock_bh(sk); udp_flush_pending_frames(sk); - release_sock(sk); + unlock_sock_bh(sk); } /* diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7cba884..3984f52 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -588,7 +588,8 @@ static u32 ipv6_addr_hash(const struct in6_addr *addr) * We perform the hash function over the last 64 bits of the address * This will include the IEEE address token on links that support it. */ - return jhash_2words(addr->s6_addr32[2], addr->s6_addr32[3], 0) + return jhash_2words((__force u32)addr->s6_addr32[2], + (__force u32)addr->s6_addr32[3], 0) & (IN6_ADDR_HSIZE - 1); } @@ -1345,7 +1346,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct hlist_node *node; rcu_read_lock_bh(); - hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -2958,7 +2959,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { struct hlist_node *n; - hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], addr_lst) if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; @@ -2973,12 +2974,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct hlist_node *n = &ifa->addr_lst; - hlist_for_each_entry_continue_rcu(ifa, n, addr_lst) + hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; while (++state->bucket < IN6_ADDR_HSIZE) { - hlist_for_each_entry(ifa, n, + hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], addr_lst) { if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; @@ -2999,7 +3000,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu) + __acquires(rcu_bh) { rcu_read_lock_bh(); return if6_get_idx(seq, *pos); @@ -3015,7 +3016,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) - __releases(rcu) + __releases(rcu_bh) { rcu_read_unlock_bh(); } @@ -3092,7 +3093,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) unsigned int hash = ipv6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3126,7 +3127,7 @@ static void addrconf_verify(unsigned long foo) for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu(ifp, node, + hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[i], addr_lst) { unsigned long age; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3192aa0..d2df314 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk) if ((skb = xchg(&np->pktoptions, NULL)) != NULL) kfree_skb(skb); + if ((skb = xchg(&np->rxpmtu, NULL)) != NULL) + kfree_skb(skb); + /* Free flowlabels */ fl6_free_socklist(sk); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 622dc79..5959230 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) kfree_skb(skb); } +void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *iph; + struct sk_buff *skb; + struct ip6_mtuinfo *mtu_info; + + if (!np->rxopt.bits.rxpmtu) + return; + + skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); + if (!skb) + return; + + skb_put(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + iph = ipv6_hdr(skb); + ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + + mtu_info = IP6CBMTU(skb); + if (!mtu_info) { + kfree_skb(skb); + return; + } + + mtu_info->ip6m_mtu = mtu; + mtu_info->ip6m_addr.sin6_family = AF_INET6; + mtu_info->ip6m_addr.sin6_port = 0; + mtu_info->ip6m_addr.sin6_flowinfo = 0; + mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); + + __skb_pull(skb, skb_tail_pointer(skb) - skb->data); + skb_reset_transport_header(skb); + + skb = xchg(&np->rxpmtu, skb); + kfree_skb(skb); +} + /* * Handle MSG_ERRQUEUE */ @@ -381,6 +420,54 @@ out: return err; } +/* + * Handle IPV6_RECVPATHMTU + */ +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct sk_buff *skb; + struct sockaddr_in6 *sin; + struct ip6_mtuinfo mtu_info; + int err; + int copied; + + err = -EAGAIN; + skb = xchg(&np->rxpmtu, NULL); + if (skb == NULL) + goto out; + + copied = skb->len; + if (copied > len) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err) + goto out_free_skb; + + sock_recv_timestamp(msg, sk, skb); + + memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info)); + + sin = (struct sockaddr_in6 *)msg->msg_name; + if (sin) { + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; + sin->sin6_port = 0; + sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; + ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr); + } + + put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); + + err = copied; + +out_free_skb: + kfree_skb(skb); +out: + return err; +} int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) @@ -497,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) int datagram_send_ctl(struct net *net, struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit, int *tclass) + int *hlimit, int *tclass, int *dontfrag) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -737,6 +824,25 @@ int datagram_send_ctl(struct net *net, break; } + + case IPV6_DONTFRAG: + { + int df; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + df = *(int *)CMSG_DATA(cmsg); + if (df < 0 || df > 1) + goto exit_f; + + err = 0; + *dontfrag = df; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8124f16..8e44f8f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -237,8 +237,8 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static struct fib_rules_ops fib6_rules_ops_template = { - .family = FIB_RULES_IPV6, +static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { + .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 12d2fa4..ce79929 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -481,7 +481,7 @@ route_done: len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, (struct rt6_info*)dst, - MSG_DONTWAIT); + MSG_DONTWAIT, np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); ip6_flush_pending_frames(sk); @@ -561,7 +561,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, - (struct rt6_info*)dst, MSG_DONTWAIT); + (struct rt6_info*)dst, MSG_DONTWAIT, + np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dc6e0b8..92a122b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -144,7 +144,8 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit) * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. */ - return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5]; + return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & + addr[fn_bit >> 5]; } static __inline__ struct fib6_node * node_alloc(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 14e2321..1365468 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, + &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7f12e30..5173aca 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -92,7 +92,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } @@ -216,8 +216,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, } kfree_skb(skb); skb = skb2; - if (sk) - skb_set_owner_w(skb, sk); + skb_set_owner_w(skb, sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -623,7 +622,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (!skb->local_df) { + if (!skb->local_df && skb->len > mtu) { skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), @@ -1103,7 +1102,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, - struct rt6_info *rt, unsigned int flags) + struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1217,15 +1216,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, */ inet->cork.length += length; - if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + if (length > mtu) { + int proto = sk->sk_protocol; + if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ + ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); - if (err) - goto error; - return 0; + if (proto == IPPROTO_UDP && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags); + if (err) + goto error; + return 0; + } } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1160400..bd43f015 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_RECVPATHMTU: + if (optlen < sizeof(int)) + goto e_inval; + np->rxopt.bits.rxpmtu = valbool; + retv = 0; + break; + case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: @@ -451,7 +458,8 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, + &junk); if (retv) goto done; update: @@ -767,6 +775,17 @@ pref_skip_coa: break; } + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + goto e_inval; + if (val < 0 || val > 255) + goto e_inval; + np->min_hopcount = val; + break; + case IPV6_DONTFRAG: + np->dontfrag = valbool; + retv = 0; + break; } release_sock(sk); @@ -1055,6 +1074,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.rxflow; break; + case IPV6_RECVPATHMTU: + val = np->rxopt.bits.rxpmtu; + break; + + case IPV6_PATHMTU: + { + struct dst_entry *dst; + struct ip6_mtuinfo mtuinfo; + + if (len < sizeof(mtuinfo)) + return -EINVAL; + + len = sizeof(mtuinfo); + memset(&mtuinfo, 0, sizeof(mtuinfo)); + + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (dst) + mtuinfo.ip6m_mtu = dst_mtu(dst); + rcu_read_unlock(); + if (!mtuinfo.ip6m_mtu) + return -ENOTCONN; + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &mtuinfo, len)) + return -EFAULT; + + return 0; + break; + } + case IPV6_UNICAST_HOPS: case IPV6_MULTICAST_HOPS: { @@ -1116,6 +1167,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val |= IPV6_PREFER_SRC_HOME; break; + case IPV6_MINHOPCOUNT: + val = np->min_hopcount; + break; + + case IPV6_DONTFRAG: + val = np->dontfrag; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f9d05ce..59f1881 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -44,6 +44,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <net/mld.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -71,54 +72,11 @@ #define MDBG(x) #endif -/* - * These header formats should be in a separate include file, but icmpv6.h - * doesn't have in6_addr defined in all cases, there is no __u128, and no - * other files reference these. - * - * +-DLS 4/14/03 - */ - -/* Multicast Listener Discovery version 2 headers */ - -struct mld2_grec { - __u8 grec_type; - __u8 grec_auxwords; - __be16 grec_nsrcs; - struct in6_addr grec_mca; - struct in6_addr grec_src[0]; -}; - -struct mld2_report { - __u8 type; - __u8 resv1; - __sum16 csum; - __be16 resv2; - __be16 ngrec; - struct mld2_grec grec[0]; -}; - -struct mld2_query { - __u8 type; - __u8 code; - __sum16 csum; - __be16 mrc; - __be16 resv1; - struct in6_addr mca; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 qrv:3, - suppress:1, - resv2:4; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u8 resv2:4, - suppress:1, - qrv:3; -#else -#error "Please fix <asm/byteorder.h>" -#endif - __u8 qqic; - __be16 nsrcs; - struct in6_addr srcs[0]; +/* Ensure that we have struct in6_addr aligned on 32bit word. */ +static void *__mld2_query_bugs[] __attribute__((__unused__)) = { + BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4), + BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4), + BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4) }; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; @@ -157,14 +115,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, ((idev)->mc_v1_seen && \ time_before(jiffies, (idev)->mc_v1_seen))) -#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) -#define MLDV2_EXP(thresh, nbmant, nbexp, value) \ - ((value) < (thresh) ? (value) : \ - ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \ - (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) - -#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) - #define IPV6_MLD_MAX_MSF 64 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; @@ -1161,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb) struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; - struct icmp6hdr *hdr; + struct mld_msg *mld; int group_type; int mark = 0; int len; @@ -1182,8 +1132,8 @@ int igmp6_event_query(struct sk_buff *skb) if (idev == NULL) return 0; - hdr = icmp6_hdr(skb); - group = (struct in6_addr *) (hdr + 1); + mld = (struct mld_msg *)icmp6_hdr(skb); + group = &mld->mld_mca; group_type = ipv6_addr_type(group); if (group_type != IPV6_ADDR_ANY && @@ -1197,7 +1147,7 @@ int igmp6_event_query(struct sk_buff *skb) /* MLDv1 router present */ /* Translate milliseconds to jiffies */ - max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000; + max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000; switchback = (idev->mc_qrv + 1) * max_delay; idev->mc_v1_seen = jiffies + switchback; @@ -1216,14 +1166,14 @@ int igmp6_event_query(struct sk_buff *skb) return -EINVAL; } mlh2 = (struct mld2_query *)skb_transport_header(skb); - max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; + max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; if (!max_delay) max_delay = 1; idev->mc_maxdelay = max_delay; - if (mlh2->qrv) - idev->mc_qrv = mlh2->qrv; + if (mlh2->mld2q_qrv) + idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ - if (mlh2->nsrcs) { + if (mlh2->mld2q_nsrcs) { in6_dev_put(idev); return -EINVAL; /* no sources allowed */ } @@ -1232,9 +1182,9 @@ int igmp6_event_query(struct sk_buff *skb) return 0; } /* mark sources to include, if group & source-specific */ - if (mlh2->nsrcs != 0) { + if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) { + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { in6_dev_put(idev); return -EINVAL; } @@ -1270,7 +1220,7 @@ int igmp6_event_query(struct sk_buff *skb) ma->mca_flags &= ~MAF_GSQUERY; } if (!(ma->mca_flags & MAF_GSQUERY) || - mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs)) + mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs)) igmp6_group_queried(ma, max_delay); spin_unlock_bh(&ma->mca_lock); break; @@ -1286,9 +1236,8 @@ int igmp6_event_query(struct sk_buff *skb) int igmp6_event_report(struct sk_buff *skb) { struct ifmcaddr6 *ma; - struct in6_addr *addrp; struct inet6_dev *idev; - struct icmp6hdr *hdr; + struct mld_msg *mld; int addr_type; /* Our own report looped back. Ignore it. */ @@ -1300,10 +1249,10 @@ int igmp6_event_report(struct sk_buff *skb) skb->pkt_type != PACKET_BROADCAST) return 0; - if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr))) return -EINVAL; - hdr = icmp6_hdr(skb); + mld = (struct mld_msg *)icmp6_hdr(skb); /* Drop reports with not link local source */ addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); @@ -1311,8 +1260,6 @@ int igmp6_event_report(struct sk_buff *skb) !(addr_type&IPV6_ADDR_LINKLOCAL)) return -EINVAL; - addrp = (struct in6_addr *) (hdr + 1); - idev = in6_dev_get(skb->dev); if (idev == NULL) return -ENODEV; @@ -1323,7 +1270,7 @@ int igmp6_event_report(struct sk_buff *skb) read_lock_bh(&idev->lock); for (ma = idev->mc_list; ma; ma=ma->next) { - if (ipv6_addr_equal(&ma->mca_addr, addrp)) { + if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { spin_lock(&ma->mca_lock); if (del_timer(&ma->mca_timer)) atomic_dec(&ma->mca_refcnt); @@ -1432,11 +1379,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); skb_put(skb, sizeof(*pmr)); pmr = (struct mld2_report *)skb_transport_header(skb); - pmr->type = ICMPV6_MLD2_REPORT; - pmr->resv1 = 0; - pmr->csum = 0; - pmr->resv2 = 0; - pmr->ngrec = 0; + pmr->mld2r_type = ICMPV6_MLD2_REPORT; + pmr->mld2r_resv1 = 0; + pmr->mld2r_cksum = 0; + pmr->mld2r_resv2 = 0; + pmr->mld2r_ngrec = 0; return skb; } @@ -1458,9 +1405,10 @@ static void mld_sendpack(struct sk_buff *skb) mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); - pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, - IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), - mldlen, 0)); + pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, + IPPROTO_ICMPV6, + csum_partial(skb_transport_header(skb), + mldlen, 0)); dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); @@ -1521,7 +1469,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = 0; pgr->grec_mca = pmc->mca_addr; /* structure copy */ pmr = (struct mld2_report *)skb_transport_header(skb); - pmr->ngrec = htons(ntohs(pmr->ngrec)+1); + pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1); *ppgr = pgr; return skb; } @@ -1557,7 +1505,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, /* EX and TO_EX get a fresh packet, if needed */ if (truncate) { - if (pmr && pmr->ngrec && + if (pmr && pmr->mld2r_ngrec && AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); @@ -1770,9 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct sock *sk = net->ipv6.igmp_sk; struct inet6_dev *idev; struct sk_buff *skb; - struct icmp6hdr *hdr; + struct mld_msg *hdr; const struct in6_addr *snd_addr, *saddr; - struct in6_addr *addrp; struct in6_addr addr_buf; int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, @@ -1820,16 +1767,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); - hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr)); - memset(hdr, 0, sizeof(struct icmp6hdr)); - hdr->icmp6_type = type; + hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg)); + memset(hdr, 0, sizeof(struct mld_msg)); + hdr->mld_type = type; + ipv6_addr_copy(&hdr->mld_mca, addr); - addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr)); - ipv6_addr_copy(addrp, addr); - - hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, - IPPROTO_ICMPV6, - csum_partial(hdr, len, 0)); + hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len, + IPPROTO_ICMPV6, + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 554b48b..4a4dcbe 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) } /* Charge it to the socket. */ - if (sock_queue_rcv_skb(sk, skb) < 0) { + if (ip_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); + if (np->rxpmtu && np->rxopt.bits.rxpmtu) + return ipv6_recv_rxpmtu(sk, msg, len); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -733,6 +736,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; + int dontfrag = -1; u16 proto; int err; @@ -811,7 +815,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -880,6 +885,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (tclass < 0) tclass = np->tclass; + if (dontfrag < 0) + dontfrag = np->dontfrag; + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -890,7 +898,7 @@ back_from_confirm: lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, - msg->msg_flags); + msg->msg_flags, dontfrag); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2438e8..05ebd78 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -815,7 +815,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, { int flags = 0; - if (rt6_need_strict(&fl->fl6_dst)) + if (fl->oif || rt6_need_strict(&fl->fl6_dst)) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl->fl6_src)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bd5ef7b..6603511 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -353,6 +353,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; + if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } + tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && @@ -1018,7 +1023,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); t1 = (struct tcphdr *) skb_push(buff, tot_len); - skb_reset_transport_header(skb); + skb_reset_transport_header(buff); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); @@ -1050,12 +1055,13 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial(t1, tot_len, 0); - memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + buff->ip_summed = CHECKSUM_PARTIAL; + buff->csum = 0; + __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); fl.proto = IPPROTO_TCP; @@ -1234,12 +1240,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; /* Secret recipe starts with IP addresses */ - d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; - d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; @@ -1677,6 +1683,7 @@ ipv6_pktoptions: static int tcp_v6_rcv(struct sk_buff *skb) { struct tcphdr *th; + struct ipv6hdr *hdr; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1703,12 +1710,13 @@ static int tcp_v6_rcv(struct sk_buff *skb) goto bad_packet; th = tcp_hdr(skb); + hdr = ipv6_hdr(skb); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); + TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); @@ -1719,6 +1727,11 @@ process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; + if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9082485..3d7a2c0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net, if (ipv6_addr_any(addr6)) hash = jhash_1word(0, mix); else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word(addr6->s6_addr32[3], mix); + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); else - hash = jhash2(addr6->s6_addr32, 4, mix); + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); return hash ^ port; } @@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); + if (np->rxpmtu && np->rxopt.bits.rxpmtu) + return ipv6_recv_rxpmtu(sk, msg, len); + try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &err); @@ -421,7 +424,7 @@ out: return err; csum_copy_err: - lock_sock(sk); + lock_sock_bh(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -430,7 +433,7 @@ csum_copy_err: UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - release_sock(sk); + unlock_sock_bh(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; @@ -511,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) UDP6_INC_STATS_BH(sock_net(sk), @@ -581,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { + if (sk_rcvqueues_full(sk, skb)) { + kfree_skb(skb1); + goto drop; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); @@ -692,7 +699,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto short_packet; + goto discard; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -756,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* deliver */ + if (sk_rcvqueues_full(sk, skb)) { + sock_put(sk); + goto discard; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); @@ -770,9 +781,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - ulen, skb->len); + saddr, + ntohs(uh->source), + ulen, + skb->len, + daddr, + ntohs(uh->dest)); discard: UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); @@ -919,6 +935,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int ulen = len; int hlimit = -1; int tclass = -1; + int dontfrag = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; @@ -1049,7 +1066,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1120,6 +1138,9 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; + if (dontfrag < 0) + dontfrag = np->dontfrag; + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1143,7 +1164,7 @@ do_append_data: err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8c452fd..4a0e77e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -94,7 +94,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); + xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) return -ENODEV; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2a4efce..79986a6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) self->tx_flow = flow; IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n", __func__); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); break; default: IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__); @@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) if (flags & O_NONBLOCK) goto out; - err = wait_event_interruptible(*(sk->sk_sleep), + err = wait_event_interruptible(*(sk_sleep(sk)), skb_peek(&sk->sk_receive_queue)); if (err) goto out; @@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, goto out; err = -ERESTARTSYS; - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (sk->sk_state != TCP_SYN_SENT))) goto out; @@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, /* Check if IrTTP is wants us to slow down */ - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { err = -ERESTARTSYS; goto out; @@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, if (copied >= target) break; - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* * POSIX 1003.1g mandates this order. @@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, /* Wait process until data arrives */ schedule(); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; @@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); lock_kernel(); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); mask = 0; /* Exceptional events? */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c18286a..8be324f 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -59,7 +59,7 @@ do { \ DEFINE_WAIT(__wait); \ long __timeo = timeo; \ ret = 0; \ - prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE); \ while (!(condition)) { \ if (!__timeo) { \ ret = -EAGAIN; \ @@ -76,7 +76,7 @@ do { \ if (ret) \ break; \ } \ - finish_wait(sk->sk_sleep, &__wait); \ + finish_wait(sk_sleep(sk), &__wait); \ } while (0) #define iucv_sock_wait(sk, condition, timeo) \ @@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk) */ static void iucv_sock_wake_msglim(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* Timers */ @@ -795,7 +798,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* Wait for an incoming connection */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = iucv_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -819,7 +822,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -1269,7 +1272,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ecc7aea..1712af1 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1617,14 +1617,9 @@ EXPORT_SYMBOL_GPL(l2tp_session_create); static __net_init int l2tp_init_net(struct net *net) { - struct l2tp_net *pn; - int err; + struct l2tp_net *pn = net_generic(net, l2tp_net_id); int hash; - pn = kzalloc(sizeof(*pn), GFP_KERNEL); - if (!pn) - return -ENOMEM; - INIT_LIST_HEAD(&pn->l2tp_tunnel_list); spin_lock_init(&pn->l2tp_tunnel_list_lock); @@ -1633,33 +1628,11 @@ static __net_init int l2tp_init_net(struct net *net) spin_lock_init(&pn->l2tp_session_hlist_lock); - err = net_assign_generic(net, l2tp_net_id, pn); - if (err) - goto out; - return 0; - -out: - kfree(pn); - return err; -} - -static __net_exit void l2tp_exit_net(struct net *net) -{ - struct l2tp_net *pn; - - pn = net_generic(net, l2tp_net_id); - /* - * if someone has cached our net then - * further net_generic call will return NULL - */ - net_assign_generic(net, l2tp_net_id, NULL); - kfree(pn); } static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, - .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index ca1164a..58c6c4c 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -276,43 +276,16 @@ out: static __net_init int l2tp_eth_init_net(struct net *net) { - struct l2tp_eth_net *pn; - int err; - - pn = kzalloc(sizeof(*pn), GFP_KERNEL); - if (!pn) - return -ENOMEM; + struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id); INIT_LIST_HEAD(&pn->l2tp_eth_dev_list); spin_lock_init(&pn->l2tp_eth_lock); - err = net_assign_generic(net, l2tp_eth_net_id, pn); - if (err) - goto out; - return 0; - -out: - kfree(pn); - return err; -} - -static __net_exit void l2tp_eth_exit_net(struct net *net) -{ - struct l2tp_eth_net *pn; - - pn = net_generic(net, l2tp_eth_net_id); - /* - * if someone has cached our net then - * further net_generic call will return NULL - */ - net_assign_generic(net, l2tp_eth_net_id, NULL); - kfree(pn); } static __net_initdata struct pernet_operations l2tp_eth_net_ops = { .init = l2tp_eth_init_net, - .exit = l2tp_eth_exit_net, .id = &l2tp_eth_net_id, .size = sizeof(struct l2tp_eth_net), }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2db6a9f..023ba82 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) int rc = 0; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) break; rc = -ERESTARTSYS; @@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) break; rc = 0; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } @@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout) DEFINE_WAIT(wait); while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) break; if (signal_pending(current) || !timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeout; } @@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) int rc; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || @@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) if (!timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 9598fdb..6bb9a9a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -19,8 +19,9 @@ #include "ieee80211_i.h" #include "driver-ops.h" -void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason) +static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason, + bool from_timer) { struct ieee80211_local *local = sta->local; struct tid_ampdu_rx *tid_rx; @@ -70,10 +71,17 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, spin_unlock_bh(&sta->lock); - del_timer_sync(&tid_rx->session_timer); + if (!from_timer) + del_timer_sync(&tid_rx->session_timer); kfree(tid_rx); } +void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason) +{ + ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false); +} + /* * After accepting the AddBA Request we activated a timer, * resetting it after each frame that arrives from the originator. @@ -92,8 +100,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); #endif - __ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_TIMEOUT); + ___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_TIMEOUT, true); } static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 608063f..c163d0a 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -184,10 +184,9 @@ static void sta_addba_resp_timer_expired(unsigned long data) HT_AGG_STATE_REQ_STOP_BA_MSK)) != HT_ADDBA_REQUESTED_MSK) { spin_unlock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " - "(or no longer) expecting addBA response there", + "(or no longer) expecting addBA response there\n", tid); #endif return; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7dd7cda..ae37270 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -97,9 +97,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, params->mesh_id_len, params->mesh_id); - if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) - return 0; - if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) rcu_assign_pointer(sdata->u.vlan.sta, NULL); @@ -107,7 +104,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, params && params->use_4addr >= 0) sdata->u.mgd.use_4addr = params->use_4addr; - sdata->u.mntr_flags = *flags; + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) + sdata->u.mntr_flags = *flags; + return 0; } @@ -411,6 +410,17 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, + int idx, struct survey_info *survey) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + if (!local->ops->get_survey) + return -EOPNOTSUPP; + + return drv_get_survey(local, idx, survey); +} + static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_info *sinfo) { @@ -1104,6 +1114,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy, changed |= BSS_CHANGED_BASIC_RATES; } + if (params->ap_isolate >= 0) { + if (params->ap_isolate) + sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; + else + sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; + } + ieee80211_bss_info_change_notify(sdata, changed); return 0; @@ -1388,11 +1405,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && - timeout == conf->dynamic_ps_timeout) + timeout == conf->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; - conf->dynamic_ps_timeout = timeout; + conf->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ mutex_lock(&sdata->u.mgd.mtx); @@ -1508,6 +1525,7 @@ struct cfg80211_ops mac80211_config_ops = { .change_station = ieee80211_change_station, .get_station = ieee80211_get_station, .dump_station = ieee80211_dump_station, + .dump_survey = ieee80211_dump_survey, #ifdef CONFIG_MAC80211_MESH .add_mpath = ieee80211_add_mpath, .del_mpath = ieee80211_del_mpath, diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6bc9b07..e763f15 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -39,6 +39,13 @@ static const struct file_operations sta_ ##name## _ops = { \ .open = mac80211_open_file_generic, \ } +#define STA_OPS_RW(name) \ +static const struct file_operations sta_ ##name## _ops = { \ + .read = sta_##name##_read, \ + .write = sta_##name##_write, \ + .open = mac80211_open_file_generic, \ +} + #define STA_FILE(name, field, format) \ STA_READ_##format(name, field) \ STA_OPS(name) @@ -156,7 +163,63 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } -STA_OPS(agg_status); + +static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + char _buf[12], *buf = _buf; + struct sta_info *sta = file->private_data; + bool start, tx; + unsigned long tid; + int ret; + + if (count > sizeof(_buf)) + return -EINVAL; + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + buf[sizeof(_buf) - 1] = '\0'; + + if (strncmp(buf, "tx ", 3) == 0) { + buf += 3; + tx = true; + } else if (strncmp(buf, "rx ", 3) == 0) { + buf += 3; + tx = false; + } else + return -EINVAL; + + if (strncmp(buf, "start ", 6) == 0) { + buf += 6; + start = true; + if (!tx) + return -EINVAL; + } else if (strncmp(buf, "stop ", 5) == 0) { + buf += 5; + start = false; + } else + return -EINVAL; + + tid = simple_strtoul(buf, NULL, 0); + + if (tid >= STA_TID_NUM) + return -EINVAL; + + if (tx) { + if (start) + ret = ieee80211_start_tx_ba_session(&sta->sta, tid); + else + ret = ieee80211_stop_tx_ba_session(&sta->sta, tid, + WLAN_BACK_RECIPIENT); + } else { + __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); + ret = 0; + } + + return ret ?: count; +} +STA_OPS_RW(agg_status); static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 9179196..ee8b63f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -152,14 +152,15 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, } static inline int drv_hw_scan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { int ret; might_sleep(); - ret = local->ops->hw_scan(&local->hw, req); - trace_drv_hw_scan(local, req, ret); + ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); + trace_drv_hw_scan(local, sdata, req, ret); return ret; } @@ -344,6 +345,15 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, return ret; } +static inline int drv_get_survey(struct ieee80211_local *local, int idx, + struct survey_info *survey) +{ + int ret = -EOPNOTSUPP; + if (local->ops->conf_tx) + ret = local->ops->get_survey(&local->hw, idx, survey); + /* trace_drv_get_survey(local, idx, survey, ret); */ + return ret; +} static inline void drv_rfkill_poll(struct ieee80211_local *local) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index e209cb82..ce734b5 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -363,23 +363,26 @@ TRACE_EVENT(drv_update_tkip_key, TRACE_EVENT(drv_hw_scan, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req, int ret), - TP_ARGS(local, req, ret), + TP_ARGS(local, sdata, req, ret), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " ret:%d", - LOCAL_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT " ret:%d", + LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret ) ); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index e6f3b0c..b72ee64 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -92,6 +92,12 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, if (memcmp(ifibss->bssid, bssid, ETH_ALEN)) sta_info_flush(sdata->local, sdata); + /* if merging, indicate to driver that we leave the old IBSS */ + if (sdata->vif.bss_conf.ibss_joined) { + sdata->vif.bss_conf.ibss_joined = false; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS); + } + memcpy(ifibss->bssid, bssid, ETH_ALEN); sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; @@ -171,6 +177,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss_change |= BSS_CHANGED_BSSID; bss_change |= BSS_CHANGED_BEACON; bss_change |= BSS_CHANGED_BEACON_ENABLED; + bss_change |= BSS_CHANGED_IBSS; + sdata->vif.bss_conf.ibss_joined = true; ieee80211_bss_info_change_notify(sdata, bss_change); ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates); @@ -481,7 +489,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " "IBSS networks with same SSID (merge)\n", sdata->name); - ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len); + ieee80211_request_internal_scan(sdata, + ifibss->ssid, ifibss->ssid_len, + ifibss->fixed_channel ? ifibss->channel : NULL); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -588,8 +598,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " "join\n", sdata->name); - ieee80211_request_internal_scan(sdata, ifibss->ssid, - ifibss->ssid_len); + ieee80211_request_internal_scan(sdata, + ifibss->ssid, ifibss->ssid_len, + ifibss->fixed_channel ? ifibss->channel : NULL); } else { int interval = IEEE80211_SCAN_INTERVAL; @@ -897,6 +908,12 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.channel = params->channel; sdata->u.ibss.fixed_channel = params->channel_fixed; + /* fix ourselves to that channel now already */ + if (params->channel_fixed) { + sdata->local->oper_channel = params->channel; + sdata->local->oper_channel_type = NL80211_CHAN_NO_HT; + } + if (params->ie) { sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len, GFP_KERNEL); @@ -951,7 +968,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree(sdata->u.ibss.ie); skb = sdata->u.ibss.presp; rcu_assign_pointer(sdata->u.ibss.presp, NULL); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + sdata->vif.bss_conf.ibss_joined = false; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_IBSS); synchronize_rcu(); kfree_skb(skb); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c9712f3..cbaf4981 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1019,7 +1019,8 @@ void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata); /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len); + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); void ieee80211_scan_cancel(struct ieee80211_local *local); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6e3b..8d4b417 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -140,6 +140,7 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sub_if_data, u.ap); + key->conf.ap_addr = sdata->dev->dev_addr; ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); if (!ret) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 011ee85..bd632e1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -442,7 +442,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) struct ieee80211_local *local = hw_to_local(hw); int result; enum ieee80211_band band; - int channels, i, j, max_bitrates; + int channels, max_bitrates; bool supp_ht; static const u32 cipher_suites[] = { WLAN_CIPHER_SUITE_WEP40, @@ -572,6 +572,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.conf.listen_interval = local->hw.max_listen_interval; + local->hw.conf.dynamic_ps_forced_timeout = -1; + result = sta_info_start(local); if (result < 0) goto fail_sta_info; @@ -606,21 +608,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_led_init(local); - /* alloc internal scan request */ - i = 0; - local->int_scan_req->ssids = &local->scan_ssid; - local->int_scan_req->n_ssids = 1; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!hw->wiphy->bands[band]) - continue; - for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) { - local->int_scan_req->channels[i] = - &hw->wiphy->bands[band]->channels[j]; - i++; - } - } - local->int_scan_req->n_channels = i; - local->network_latency_notifier.notifier_call = ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 35d8502..358226f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -175,6 +175,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, ht_changed = conf_is_ht(&local->hw.conf) != enable_ht || channel_type != local->hw.conf.channel_type; + if (local->tmp_channel) + local->tmp_channel_type = channel_type; local->oper_channel_type = channel_type; if (ht_changed) { @@ -476,6 +478,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) { struct ieee80211_sub_if_data *sdata, *found = NULL; int count = 0; + int timeout; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) { local->ps_sdata = NULL; @@ -509,6 +512,26 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) beaconint_us = ieee80211_tu_to_usec( found->vif.bss_conf.beacon_int); + timeout = local->hw.conf.dynamic_ps_forced_timeout; + if (timeout < 0) { + /* + * The 2 second value is there for compatibility until + * the PM_QOS_NETWORK_LATENCY is configured with real + * values. + */ + if (latency == 2000000000) + timeout = 100; + else if (latency <= 50000) + timeout = 300; + else if (latency <= 100000) + timeout = 100; + else if (latency <= 500000) + timeout = 50; + else + timeout = 0; + } + local->hw.conf.dynamic_ps_timeout = timeout; + if (beaconint_us > latency) { local->ps_sdata = NULL; } else { @@ -1331,12 +1354,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->iflist_mtx); ieee80211_recalc_ps(sdata->local, -1); mutex_unlock(&sdata->local->iflist_mtx); + + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + return; + /* * We've received a probe response, but are not sure whether * we have or will be receiving any beacons or data, so let's * schedule the timers again, just in case. */ mod_beacon_timer(sdata); + mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 72efbd8..9a08f2c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -81,8 +81,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, len += 8; if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) len += 1; - if (local->hw.flags & IEEE80211_HW_NOISE_DBM) - len += 1; if (len & 1) /* padding for RX_FLAGS if necessary */ len++; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index e1a3def..e14c441 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -85,7 +85,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, { struct cfg80211_bss *cbss; struct ieee80211_bss *bss; - int clen; + int clen, srlen; s32 signal = 0; if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) @@ -114,23 +114,24 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->dtim_period = tim_ie->dtim_period; } - bss->supp_rates_len = 0; + /* replace old supported rates if we get new values */ + srlen = 0; if (elems->supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + clen = IEEE80211_MAX_SUPP_RATES; if (clen > elems->supp_rates_len) clen = elems->supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, - clen); - bss->supp_rates_len += clen; + memcpy(bss->supp_rates, elems->supp_rates, clen); + srlen += clen; } if (elems->ext_supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + clen = IEEE80211_MAX_SUPP_RATES - srlen; if (clen > elems->ext_supp_rates_len) clen = elems->ext_supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], - elems->ext_supp_rates, clen); - bss->supp_rates_len += clen; + memcpy(bss->supp_rates + srlen, elems->ext_supp_rates, clen); + srlen += clen; } + if (srlen) + bss->supp_rates_len = srlen; bss->wmm_used = elems->wmm_param || elems->wmm_info; bss->uapsd_supported = is_uapsd_supported(elems); @@ -411,7 +412,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { WARN_ON(!ieee80211_prep_hw_scan(local)); - rc = drv_hw_scan(local, local->hw_scan_req); + rc = drv_hw_scan(local, sdata, local->hw_scan_req); } else rc = ieee80211_start_sw_scan(local); @@ -655,7 +656,7 @@ void ieee80211_scan_work(struct work_struct *work) } if (local->hw_scan_req) { - int rc = drv_hw_scan(local, local->hw_scan_req); + int rc = drv_hw_scan(local, sdata, local->hw_scan_req); mutex_unlock(&local->scan_mtx); if (rc) ieee80211_scan_completed(&local->hw, true); @@ -728,10 +729,12 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, } int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, - const u8 *ssid, u8 ssid_len) + const u8 *ssid, u8 ssid_len, + struct ieee80211_channel *chan) { struct ieee80211_local *local = sdata->local; int ret = -EBUSY; + enum nl80211_band band; mutex_lock(&local->scan_mtx); @@ -739,6 +742,30 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, if (local->scan_req) goto unlock; + /* fill internal scan request */ + if (!chan) { + int i, nchan = 0; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!local->hw.wiphy->bands[band]) + continue; + for (i = 0; + i < local->hw.wiphy->bands[band]->n_channels; + i++) { + local->int_scan_req->channels[nchan] = + &local->hw.wiphy->bands[band]->channels[i]; + nchan++; + } + } + + local->int_scan_req->n_channels = nchan; + } else { + local->int_scan_req->channels[0] = chan; + local->int_scan_req->n_channels = 1; + } + + local->int_scan_req->ssids = &local->scan_ssid; + local->int_scan_req->n_ssids = 1; memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); local->int_scan_req->ssids[0].ssid_len = ssid_len; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ff0eb94..7301975 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -575,7 +575,7 @@ static int sta_info_buffer_expired(struct sta_info *sta, } -static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, +static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, struct sta_info *sta) { unsigned long flags; @@ -583,7 +583,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; if (skb_queue_empty(&sta->ps_tx_buf)) - return; + return false; for (;;) { spin_lock_irqsave(&sta->ps_tx_buf.lock, flags); @@ -608,6 +608,8 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, if (skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); } + + return true; } static int __must_check __sta_info_destroy(struct sta_info *sta) @@ -755,15 +757,20 @@ static void sta_info_cleanup(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; struct sta_info *sta; + bool timer_needed = false; rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) - sta_info_cleanup_expire_buffered(local, sta); + if (sta_info_cleanup_expire_buffered(local, sta)) + timer_needed = true; rcu_read_unlock(); if (local->quiescing) return; + if (!timer_needed) + return; + local->sta_cleanup.expires = round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); add_timer(&local->sta_cleanup); @@ -848,8 +855,12 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, struct sta_info *sta, *nxt; /* Just return a random station ... first in list ... */ - for_each_sta_info(hw_to_local(hw), addr, sta, nxt) + for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { + if (!sta->uploaded) + return NULL; return &sta->sta; + } + return NULL; } EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); @@ -857,14 +868,19 @@ EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr) { - struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; if (!vif) return NULL; - sdata = vif_to_sdata(vif); + sta = sta_info_get_bss(vif_to_sdata(vif), addr); + if (!sta) + return NULL; + + if (!sta->uploaded) + return NULL; - return ieee80211_find_sta_by_hw(&sdata->local->hw, addr); + return &sta->sta; } EXPORT_SYMBOL(ieee80211_find_sta); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 11805a3..94613af0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -171,6 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct net_device *prev_dev = NULL; struct sta_info *sta, *tmp; int retry_count = -1, i; + int rates_idx = -1; bool send_to_cooked; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { @@ -178,6 +179,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (i >= hw->max_rates) { info->status.rates[i].idx = -1; info->status.rates[i].count = 0; + } else if (info->status.rates[i].idx >= 0) { + rates_idx = i; } retry_count += info->status.rates[i].count; @@ -206,6 +209,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && + (rates_idx != -1)) + sta->last_tx_rate = info->status.rates[rates_idx]; + if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && (ieee80211_is_data_qos(fc))) { u16 tid, ssn; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2cb7726..f3841f4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -429,6 +429,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + struct ieee80211_local *local = tx->local; u32 staflags; if (unlikely(!sta || @@ -476,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; skb_queue_tail(&sta->ps_tx_buf, tx->skb); + + if (!timer_pending(&local->sta_cleanup)) + mod_timer(&local->sta_cleanup, + round_jiffies(jiffies + + STA_INFO_CLEANUP_INTERVAL)); + return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -586,7 +593,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) struct ieee80211_hdr *hdr = (void *)tx->skb->data; struct ieee80211_supported_band *sband; struct ieee80211_rate *rate; - int i, len; + int i; + u32 len; bool inval = false, rts = false, short_preamble = false; struct ieee80211_tx_rate_control txrc; u32 sta_flags; @@ -595,7 +603,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; - len = min_t(int, tx->skb->len + FCS_LEN, + len = min_t(u32, tx->skb->len + FCS_LEN, tx->local->hw.wiphy->frag_threshold); /* set up the tx rate control struct we give the RC algo */ diff --git a/net/mac80211/work.c b/net/mac80211/work.c index bdb1d05..3dd0760 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -213,15 +213,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[wk->chan->band]; - /* - * Get all rates supported by the device and the AP as - * some APs don't like getting a superset of their rates - * in the association request (e.g. D-Link DAP 1353 in - * b-only mode)... - */ - rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates, - wk->assoc.supp_rates_len, - sband, &rates); + if (wk->assoc.supp_rates_len) { + /* + * Get all rates supported by the device and the AP as + * some APs don't like getting a superset of their rates + * in the association request (e.g. D-Link DAP 1353 in + * b-only mode)... + */ + rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates, + wk->assoc.supp_rates_len, + sband, &rates); + } else { + /* + * In case AP not provide any supported rates information + * before association, we send information element(s) with + * all rates that we support. + */ + rates = ~0; + rates_len = sband->n_bitrates; + } skb = alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + /* bit too much but doesn't matter */ diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 8fb0ae6..7ba0693 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -802,7 +802,7 @@ static int sync_thread_backup(void *data) ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); while (!kthread_should_stop()) { - wait_event_interruptible(*tinfo->sock->sk->sk_sleep, + wait_event_interruptible(*sk_sleep(tinfo->sock->sk), !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) || kthread_should_stop()); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa07f04..06cb027 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; } @@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f162d59..2078a27 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2228,8 +2228,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (!net_eq(sock_net(sk), &init_net)) - return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e2a9576..af4d38b 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock) if (signal_pending(tsk)) return sock_intr_errno(timeo); - prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); - finish_wait(&sk->sk_socket->wait, &wait); + finish_wait(sk_sleep(sk), &wait); } return 0; @@ -910,10 +910,10 @@ disabled: goto out; } - prepare_to_wait(&sk->sk_socket->wait, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); - finish_wait(&sk->sk_socket->wait, &wait); + finish_wait(sk_sleep(sk), &wait); if (sk->sk_state != TCP_ESTABLISHED) goto disabled; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 9b4ced6..c33da65 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -46,9 +46,16 @@ struct phonet_net { int phonet_net_id __read_mostly; +static struct phonet_net *phonet_pernet(struct net *net) +{ + BUG_ON(!net); + + return net_generic(net, phonet_net_id); +} + struct phonet_device_list *phonet_device_list(struct net *net) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn = phonet_pernet(net); return &pnn->pndevs; } @@ -261,7 +268,7 @@ static int phonet_device_autoconf(struct net_device *dev) static void phonet_route_autodel(struct net_device *dev) { - struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_net *pnn = phonet_pernet(dev_net(dev)); unsigned i; DECLARE_BITMAP(deleted, 64); @@ -313,7 +320,7 @@ static struct notifier_block phonet_device_notifier = { /* Per-namespace Phonet devices handling */ static int __net_init phonet_init_net(struct net *net) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn = phonet_pernet(net); if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) return -ENOMEM; @@ -326,7 +333,7 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn = phonet_pernet(net); struct net_device *dev; unsigned i; @@ -376,7 +383,7 @@ void phonet_device_exit(void) int phonet_route_add(struct net_device *dev, u8 daddr) { - struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; int err = -EEXIST; @@ -393,7 +400,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr) int phonet_route_del(struct net_device *dev, u8 daddr) { - struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; daddr = daddr >> 2; @@ -413,7 +420,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr) struct net_device *phonet_route_get(struct net *net, u8 daddr) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; @@ -428,7 +435,7 @@ struct net_device *phonet_route_get(struct net *net, u8 daddr) struct net_device *phonet_route_output(struct net *net, u8 daddr) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c785bfd..6e9848bf 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, struct pep_sock *pn = pep_sk(sk); unsigned int mask = 0; - poll_wait(file, &sock->wait, wait); + poll_wait(file, sk_sleep(sk), wait); switch (sk->sk_state) { case TCP_LISTEN: diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 7919a9e..aebfecb 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -158,7 +158,7 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, unsigned int mask = 0; unsigned long flags; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (rs->rs_seen_congestion) poll_wait(file, &rds_poll_waitq, wait); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 5ea82fc..e599ba2 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -133,7 +133,7 @@ static int __init rds_rdma_listen_init(void) ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " "rdma_create_id() returned %d\n", ret); - goto out; + return ret; } sin.sin_family = AF_INET, diff --git a/net/rds/rds.h b/net/rds/rds.h index 4bec6e2..c224b5b 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -492,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs); void rds_wake_sk_sleep(struct rds_sock *rs); static inline void __rds_wake_sk_sleep(struct sock *sk) { - wait_queue_head_t *waitq = sk->sk_sleep; + wait_queue_head_t *waitq = sk_sleep(sk); if (!sock_flag(sk, SOCK_DEAD) && waitq) wake_up(waitq); diff --git a/net/rds/recv.c b/net/rds/recv.c index e2a2b93..795a00b 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, break; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), (!list_empty(&rs->rs_notify_queue) || rs->rs_cong_notify || rds_next_incoming(rs, &inc)), timeo); diff --git a/net/rds/send.c b/net/rds/send.c index 53d6795..9c1c6bc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -915,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4fb711a..8e45e76 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -845,7 +845,7 @@ rose_try_next_neigh: DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -858,7 +858,7 @@ rose_try_next_neigh: err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) @@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c060095b..0b9bb20 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk) static void rxrpc_write_space(struct sock *sk) { _enter("%p", sk); - read_lock(&sk->sk_callback_lock); + rcu_read_lock(); if (rxrpc_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + struct socket_wq *wq = rcu_dereference(sk->sk_wq); + + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* @@ -589,7 +591,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 60c2b94e..0c65013 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -91,7 +91,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* wait for a message to turn up */ release_sock(&rx->sk); - prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, TASK_INTERRUPTIBLE); ret = sock_error(&rx->sk); if (ret) @@ -102,7 +102,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, goto wait_interrupted; timeo = schedule_timeout(timeo); } - finish_wait(rx->sk.sk_sleep, &wait); + finish_wait(sk_sleep(&rx->sk), &wait); lock_sock(&rx->sk); continue; } @@ -356,7 +356,7 @@ csum_copy_error: wait_interrupted: ret = sock_intr_errno(timeo); wait_error: - finish_wait(rx->sk.sk_sleep, &wait); + finish_wait(sk_sleep(&rx->sk), &wait); if (continue_call) rxrpc_put_call(continue_call); if (copied) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index aeddabf..a969b11 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -94,7 +94,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * Another cpu is holding lock, requeue & delay xmits for * some time. */ - __get_cpu_var(netdev_rx_stat).cpu_collision++; + __get_cpu_var(softnet_data).cpu_collision++; ret = dev_requeue_skb(skb, q); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c5a9ac5..c657628 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -123,8 +123,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); - h = iph->daddr; - h2 = iph->saddr ^ iph->protocol; + h = (__force u32)iph->daddr; + h2 = (__force u32)iph->saddr ^ iph->protocol; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || @@ -138,8 +138,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); - h = iph->daddr.s6_addr32[3]; - h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr; + h = (__force u32)iph->daddr.s6_addr32[3]; + h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || iph->nexthdr == IPPROTO_UDPLITE || @@ -150,7 +150,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) break; } default: - h = (unsigned long)skb_dst(skb) ^ skb->protocol; + h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol; h2 = (unsigned long)skb->sk; } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 58b3e88..126b014e 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -37,6 +37,18 @@ menuconfig IP_SCTP if IP_SCTP +config NET_SCTPPROBE + tristate "SCTP: Association probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to SCTP association + state in response to incoming packets. It is used for debugging + SCTP congestion control algorithms. If you don't understand + what was just said, you don't need it: say N. + + To compile this code as a module, choose M here: the + module will be called sctp_probe. + config SCTP_DBG_MSG bool "SCTP: Debug messages" help diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6b79473..5c30b7a 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_IP_SCTP) += sctp.o +obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ @@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o ssnmap.o auth.o +sctp_probe-y := probe.o + sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df5abbf..3912420 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); - /* Init all variables to a known value. */ - memset(asoc, 0, sizeof(struct sctp_association)); - /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; sctp_endpoint_hold(asoc->ep); @@ -762,7 +759,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, asoc->peer.retran_path = peer; } - if (asoc->peer.active_path == asoc->peer.retran_path) { + if (asoc->peer.active_path == asoc->peer.retran_path && + peer->state != SCTP_UNCONFIRMED) { asoc->peer.retran_path = peer; } @@ -1194,8 +1192,10 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { trans = list_entry(pos, struct sctp_transport, transports); - if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) - sctp_assoc_del_peer(asoc, &trans->ipaddr); + if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) { + sctp_assoc_rm_peer(asoc, trans); + continue; + } if (asoc->state >= SCTP_STATE_ESTABLISHED) sctp_transport_reset(trans); @@ -1318,12 +1318,13 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Keep track of the next transport in case * we don't find any active transport. */ - if (!next) + if (t->state != SCTP_UNCONFIRMED && !next) next = t; } } - asoc->peer.retran_path = t; + if (t) + asoc->peer.retran_path = t; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" " %p addr: ", @@ -1483,7 +1484,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) if (asoc->rwnd >= len) { asoc->rwnd -= len; if (over) { - asoc->rwnd_press = asoc->rwnd; + asoc->rwnd_press += asoc->rwnd; asoc->rwnd = 0; } } else { diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3eab6db..476caaf 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_abandon = 0; + msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); - msg->msg_size = 0; } /* Allocate and initialize datamsg. */ @@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu { sctp_datamsg_hold(msg); chunk->msg = msg; - msg->msg_size += chunk->skb->len; } @@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (msg_len >= first_len) { msg_len -= first_len; whole = 1; + msg->can_delay = 0; } /* How many full sized? How many bytes leftover? */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 905fda5..e10acc0 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sctp_shared_key *null_key; int err; - memset(ep, 0, sizeof(struct sctp_endpoint)); - ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; @@ -144,6 +142,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); diff --git a/net/sctp/output.c b/net/sctp/output.c index fad261d..a646681 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ - if (!chunk->resent) { - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ - - if (!tp->rto_pending) { - chunk->rtt_in_progress = 1; - tp->rto_pending = 1; - } + if (!tp->rto_pending) { + chunk->rtt_in_progress = 1; + tp->rto_pending = 1; } - - chunk->resent = 1; - has_data = 1; } @@ -681,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, * Don't delay large message writes that may have been * fragmeneted into small peices. */ - if ((len < max) && (chunk->msg->msg_size < max)) { + if ((len < max) && chunk->msg->can_delay) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abfc0b8..5d05717 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn); + __u32 *highest_new_tsn); static void sctp_mark_missing(struct sctp_outq *q, struct list_head *transmitted_queue, @@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) /* If it is data, queue it up, otherwise, send it * immediately. */ - if (SCTP_CID_DATA == chunk->chunk_hdr->type) { + if (sctp_chunk_is_data(chunk)) { /* Is it OK to queue data chunks? */ /* From 9. Termination of Association * @@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (fast_rtx && !chunk->fast_retransmit) continue; +redo: /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: + if (!pkt->has_data && !pkt->has_cookie_echo) { + /* If this packet did not contain DATA then + * retransmission did not happen, so do it + * again. We'll ignore the error here since + * control chunks are already freed so there + * is nothing we can do. + */ + sctp_packet_transmit(pkt); + goto redo; + } + /* Send this packet. */ error = sctp_packet_transmit(pkt); @@ -647,14 +659,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - /* Force start T3-rtx timer when fast retransmitting - * the earliest outstanding TSN - */ - if (!timer && fast_rtx && - ntohl(chunk->subh.data_hdr->tsn) == - asoc->ctsn_ack_point + 1) - timer = 2; - q->empty = 0; break; } @@ -854,6 +858,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (status != SCTP_XMIT_OK) { /* put the chunk back */ list_add(&chunk->list, &q->control_chunk_list); + } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + /* PR-SCTP C5) If a FORWARD TSN is sent, the + * sender MUST assure that at least one T3-rtx + * timer is running. + */ + sctp_transport_reset_timers(transport); } break; @@ -906,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport, - start_timer-1); + sctp_transport_reset_timers(transport); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -1040,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport, 0); + sctp_transport_reset_timers(transport); q->empty = 0; @@ -1100,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc, assoc->unack_data = unack_data; } -/* Return the highest new tsn that is acknowledged by the given SACK chunk. */ -static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack, - struct sctp_association *asoc) -{ - struct sctp_transport *transport; - struct sctp_chunk *chunk; - __u32 highest_new_tsn, tsn; - struct list_head *transport_list = &asoc->peer.transport_addr_list; - - highest_new_tsn = ntohl(sack->cum_tsn_ack); - - list_for_each_entry(transport, transport_list, transports) { - list_for_each_entry(chunk, &transport->transmitted, - transmitted_list) { - tsn = ntohl(chunk->subh.data_hdr->tsn); - - if (!chunk->tsn_gap_acked && - TSN_lt(highest_new_tsn, tsn) && - sctp_acked(sack, tsn)) - highest_new_tsn = tsn; - } - } - - return highest_new_tsn; -} - /* This is where we REALLY process a SACK. * * Process the SACK against the outqueue. Mostly, this just frees @@ -1145,6 +1128,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; int gap_ack_blocks; + u8 accum_moved = 0; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; @@ -1193,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) if (gap_ack_blocks) highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); - if (TSN_lt(asoc->highest_sacked, highest_tsn)) { - highest_new_tsn = highest_tsn; + if (TSN_lt(asoc->highest_sacked, highest_tsn)) asoc->highest_sacked = highest_tsn; - } else { - highest_new_tsn = sctp_highest_new_tsn(sack, asoc); - } + highest_new_tsn = sack_ctsn; /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ - sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); + sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1213,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, - transport, sack, highest_new_tsn); + transport, sack, &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of @@ -1223,16 +1204,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } + /* Move the Cumulative TSN Ack Point if appropriate. */ + if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) { + asoc->ctsn_ack_point = sack_ctsn; + accum_moved = 1; + } + if (gap_ack_blocks) { + + if (asoc->fast_recovery && accum_moved) + highest_new_tsn = highest_tsn; + list_for_each_entry(transport, transport_list, transports) sctp_mark_missing(q, &transport->transmitted, transport, highest_new_tsn, count_of_newacks); } - /* Move the Cumulative TSN Ack Point if appropriate. */ - if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) - asoc->ctsn_ack_point = sack_ctsn; - /* Update unack_data field in the assoc. */ sctp_sack_update_unack_data(asoc, sack); @@ -1315,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn_in_sack) + __u32 *highest_new_tsn_in_sack) { struct list_head *lchunk; struct sctp_chunk *tchunk; @@ -1387,7 +1374,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; @@ -1404,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); @@ -1677,7 +1664,8 @@ static void sctp_mark_missing(struct sctp_outq *q, struct sctp_chunk *chunk; __u32 tsn; char do_fast_retransmit = 0; - struct sctp_transport *primary = q->asoc->peer.primary_path; + struct sctp_association *asoc = q->asoc; + struct sctp_transport *primary = asoc->peer.primary_path; list_for_each_entry(chunk, transmitted_queue, transmitted_list) { diff --git a/net/sctp/probe.c b/net/sctp/probe.c new file mode 100644 index 0000000..db3a42b --- /dev/null +++ b/net/sctp/probe.c @@ -0,0 +1,214 @@ +/* + * sctp_probe - Observe the SCTP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * Modified for SCTP from Stephen Hemminger's code + * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/sctp.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/time.h> +#include <net/net_namespace.h> + +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> + +MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>"); +MODULE_DESCRIPTION("SCTP snooper"); +MODULE_LICENSE("GPL"); + +static int port __read_mostly = 0; +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +static int bufsize __read_mostly = 64 * 1024; +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +static int full __read_mostly = 1; +MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); +module_param(full, int, 0); + +static const char procname[] = "sctpprobe"; + +static struct { + struct kfifo fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timespec tstart; +} sctpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + char tbuf[256]; + + va_start(args, fmt); + len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); + va_end(args); + + kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + wake_up(&sctpw.wait); +} + +static int sctpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(&sctpw.fifo); + getnstimeofday(&sctpw.tstart); + + return 0; +} + +static ssize_t sctpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(sctpw.wait, + kfifo_len(&sctpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static const struct file_operations sctpprobe_fops = { + .owner = THIS_MODULE, + .open = sctpprobe_open, + .read = sctpprobe_read, +}; + +sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_transport *sp; + static __u32 lcwnd = 0; + struct timespec now; + + sp = asoc->peer.primary_path; + + if ((full || sp->cwnd != lcwnd) && + (!port || asoc->peer.port == port || + ep->base.bind_addr.port == port)) { + lcwnd = sp->cwnd; + + getnstimeofday(&now); + now = timespec_sub(now, sctpw.tstart); + + printl("%lu.%06lu ", (unsigned long) now.tv_sec, + (unsigned long) now.tv_nsec / NSEC_PER_USEC); + + printl("%p %5d %5d %5d %8d %5d ", asoc, + ep->base.bind_addr.port, asoc->peer.port, + asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); + + list_for_each_entry(sp, &asoc->peer.transport_addr_list, + transports) { + if (sp == asoc->peer.primary_path) + printl("*"); + + if (sp->ipaddr.sa.sa_family == AF_INET) + printl("%pI4 ", &sp->ipaddr.v4.sin_addr); + else + printl("%pI6 ", &sp->ipaddr.v6.sin6_addr); + + printl("%2u %8u %8u %8u %8u %8u ", + sp->state, sp->cwnd, sp->ssthresh, + sp->flight_size, sp->partial_bytes_acked, + sp->pathmtu); + } + printl("\n"); + } + + jprobe_return(); + return 0; +} + +static struct jprobe sctp_recv_probe = { + .kp = { + .symbol_name = "sctp_sf_eat_sack_6_2", + }, + .entry = jsctp_sf_eat_sack, +}; + +static __init int sctpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&sctpw.wait); + spin_lock_init(&sctpw.lock); + if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) + return ret; + + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, + &sctpprobe_fops)) + goto free_kfifo; + + ret = register_jprobe(&sctp_recv_probe); + if (ret) + goto remove_proc; + + pr_info("SCTP probe registered (port=%d)\n", port); + + return 0; + +remove_proc: + proc_net_remove(&init_net, procname); +free_kfifo: + kfifo_free(&sctpw.fifo); + return ret; +} + +static __exit void sctpprobe_exit(void) +{ + kfifo_free(&sctpw.fifo); + proc_net_remove(&init_net, procname); + unregister_jprobe(&sctp_recv_probe); +} + +module_init(sctpprobe_init); +module_exit(sctpprobe_exit); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 704298f..1827498 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; + fl.fl_ip_dport = daddr->v4.sin_port; fl.proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); fl.oif = asoc->base.sk->sk_bound_dev_if; + fl.fl_ip_sport = htons(asoc->base.bind_addr.port); } - if (saddr) + if (saddr) { fl.fl4_src = saddr->v4.sin_addr.s_addr; + fl.fl_ip_sport = saddr->v4.sin_port; + } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl.fl4_dst, &fl.fl4_src); @@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; + fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { dst = &rt->u.dst; goto out_unlock; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17cb400e..d8261f3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -108,7 +108,7 @@ static const struct sctp_paramhdr prsctp_param = { cpu_to_be16(sizeof(struct sctp_paramhdr)), }; -/* A helper to initialize to initialize an op error inside a +/* A helper to initialize an op error inside a * provided chunk, as most cause codes will be embedded inside an * abort chunk. */ @@ -125,6 +125,29 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); } +/* A helper to initialize an op error inside a + * provided chunk, as most cause codes will be embedded inside an + * abort chunk. Differs from sctp_init_cause in that it won't oops + * if there isn't enough space in the op error chunk + */ +int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, + size_t paylen) +{ + sctp_errhdr_t err; + __u16 len; + + /* Cause code constants are now defined in network order. */ + err.cause = cause_code; + len = sizeof(sctp_errhdr_t) + paylen; + err.length = htons(len); + + if (skb_tailroom(chunk->skb) > len) + return -ENOSPC; + chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, + sizeof(sctp_errhdr_t), + &err); + return 0; +} /* 3.3.2 Initiation (INIT) (1) * * This chunk is used to initiate a SCTP association between two @@ -208,7 +231,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sp = sctp_sk(asoc->base.sk); num_types = sp->pf->supported_addrs(sp, types); - chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); + chunksize = sizeof(init) + addrs_len; + chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (sctp_prsctp_enable) @@ -238,14 +262,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -255,7 +279,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -397,13 +422,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -412,17 +437,25 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); if (!retval) goto nomem_chunk; - /* Per the advice in RFC 2960 6.4, send this reply to - * the source of the INIT packet. + /* RFC 2960 6.4 Multi-homed SCTP Endpoints + * + * An endpoint SHOULD transmit reply chunks (e.g., SACK, + * HEARTBEAT ACK, * etc.) to the same destination transport + * address from which it received the DATA or control chunk + * to which it is replying. + * + * [INIT ACK back to where the INIT came from.] */ retval->transport = chunk->transport; + retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(initack), &initack); retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); @@ -461,18 +494,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; - /* RFC 2960 6.4 Multi-homed SCTP Endpoints - * - * An endpoint SHOULD transmit reply chunks (e.g., SACK, - * HEARTBEAT ACK, * etc.) to the same destination transport - * address from which it received the DATA or control chunk - * to which it is replying. - * - * [INIT ACK back to where the INIT came from.] - */ - if (chunk) - retval->transport = chunk->transport; - nomem_chunk: kfree(cookie); nomem_cookie: @@ -1129,6 +1150,24 @@ nodata: return retval; } +/* Create an Operation Error chunk of a fixed size, + * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) + * This is a helper function to allocate an error chunk for + * for those invalid parameter codes in which we may not want + * to report all the errors, if the incomming chunk is large + */ +static inline struct sctp_chunk *sctp_make_op_error_fixed( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) +{ + size_t size = asoc ? asoc->pathmtu : 0; + + if (!size) + size = SCTP_DEFAULT_MAXSEGMENT; + + return sctp_make_op_error_space(asoc, chunk, size); +} + /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, @@ -1210,7 +1249,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, INIT_LIST_HEAD(&retval->list); retval->skb = skb; retval->asoc = (struct sctp_association *)asoc; - retval->resent = 0; retval->has_tsn = 0; retval->has_ssn = 0; retval->rtt_in_progress = 0; @@ -1371,6 +1409,18 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } +/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient + * space in the chunk + */ +void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, + int len, const void *data) +{ + if (skb_tailroom(chunk->skb) > len) + return sctp_addto_chunk(chunk, len, data); + else + return NULL; +} + /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1974,13 +2024,12 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * returning multiple unknown parameters. */ if (NULL == *errp) - *errp = sctp_make_op_error_space(asoc, chunk, - ntohs(chunk->chunk_hdr->length)); + *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk(*errp, + sctp_addto_chunk_fixed(*errp, WORD_ROUND(ntohs(param.p->length)), param.v); } else { @@ -3315,21 +3364,6 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, sctp_chunk_free(asconf); asoc->addip_last_asconf = NULL; - /* Send the next asconf chunk from the addip chunk queue. */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - asconf = list_entry(entry, struct sctp_chunk, list); - - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } - return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4c5bed9..3b7230e 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, { struct sctp_transport *t; - t = sctp_assoc_choose_alter_transport(asoc, + if (chunk->transport) + t = chunk->transport; + else { + t = sctp_assoc_choose_alter_transport(asoc, asoc->shutdown_last_sent_to); + chunk->transport = t; + } asoc->shutdown_last_sent_to = t; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; - chunk->transport = t; } /* Helper function to change the state of an association. */ @@ -962,6 +966,29 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, } +/* Sent the next ASCONF packet currently stored in the association. + * This happens after the ASCONF_ACK was succeffully processed. + */ +static void sctp_cmd_send_asconf(struct sctp_association *asoc) +{ + /* Send the next asconf chunk from the addip chunk + * queue. + */ + if (!list_empty(&asoc->addip_chunk_list)) { + struct list_head *entry = asoc->addip_chunk_list.next; + struct sctp_chunk *asconf = list_entry(entry, + struct sctp_chunk, list); + list_del_init(entry); + + /* Hold the chunk until an ASCONF_ACK is received. */ + sctp_chunk_hold(asconf); + if (sctp_primitive_ASCONF(asoc, asconf)) + sctp_chunk_free(asconf); + else + asoc->addip_last_asconf = asconf; + } +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real @@ -1617,6 +1644,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, } error = sctp_cmd_send_msg(asoc, cmd->obj.msg); break; + case SCTP_CMD_SEND_NEXT_ASCONF: + sctp_cmd_send_asconf(asoc); + break; default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index abf601a..24b2cd5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3676,8 +3676,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) + asconf_ack)) { + /* Successfully processed ASCONF_ACK. We can + * release the next asconf if we have one. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, + SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; + } abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c194127..ba1add0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3719,12 +3719,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - percpu_counter_inc(&sctp_sockets_allocated); - - /* Set socket backlog limit. */ - sk->sk_backlog.limit = sysctl_sctp_rmem[1]; local_bh_disable(); + percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); @@ -3741,8 +3738,8 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); + percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } @@ -4387,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, transports) { memcpy(&temp, &from->ipaddr, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) @@ -5702,7 +5699,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sctp_sock *sp = sctp_sk(sk); unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); /* A TCP-style listening socket becomes readable when the accept queue * is not empty. @@ -5943,7 +5940,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) int error; DEFINE_WAIT(wait); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -5980,14 +5977,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) sctp_lock_sock(sk); ready: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return 0; interrupted: error = sock_intr_errno(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); *err = error; return error; } @@ -6061,14 +6058,14 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->fasync_list && + if (sock->wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); @@ -6188,6 +6185,19 @@ do_nonblock: goto out; } +void sctp_data_ready(struct sock *sk, int len) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + rcu_read_unlock(); +} + /* If socket sndbuf has changed, wake up all per association waiters. */ void sctp_write_space(struct sock *sk) { @@ -6296,7 +6306,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { @@ -6322,7 +6332,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } @@ -6332,7 +6342,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; sctp_release_sock(sk); @@ -6340,7 +6350,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) sctp_lock_sock(sk); } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index be4d63d..fccf494 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Copy in the address. */ peer->ipaddr = *addr; peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); - peer->asoc = NULL; - - peer->dst = NULL; memset(&peer->saddr, 0, sizeof(union sctp_addr)); /* From 6.3.1 RTO Calculation: @@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rto = msecs_to_jiffies(sctp_rto_initial); - peer->rtt = 0; - peer->rttvar = 0; - peer->srtt = 0; - peer->rto_pending = 0; - peer->hb_sent = 0; - peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; - peer->init_sent_count = 0; - peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; - peer->hbinterval = 0; /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; - peer->error_count = 0; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); - peer->T3_rtx_timer.expires = 0; - peer->hb_timer.expires = 0; - setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, @@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); atomic_set(&peer->refcnt, 1); - peer->dead = 0; - - peer->malloced = 0; - - /* Initialize the state information for SFR-CACC */ - peer->cacc.changeover_active = 0; - peer->cacc.cycling_changeover = 0; - peer->cacc.next_tsn_at_change = 0; - peer->cacc.cacc_saw_newack = 0; return peer; } @@ -195,7 +170,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport, int force) +void sctp_transport_reset_timers(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -205,7 +180,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force) * address. */ - if (force || !timer_pending(&transport->T3_rtx_timer)) + if (!timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); @@ -403,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) void sctp_transport_raise_cwnd(struct sctp_transport *transport, __u32 sack_ctsn, __u32 bytes_acked) { + struct sctp_association *asoc = transport->asoc; __u32 cwnd, ssthresh, flight_size, pba, pmtu; cwnd = transport->cwnd; flight_size = transport->flight_size; /* See if we need to exit Fast Recovery first */ - if (transport->fast_recovery && - TSN_lte(transport->fast_recovery_exit, sack_ctsn)) - transport->fast_recovery = 0; + if (asoc->fast_recovery && + TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) + asoc->fast_recovery = 0; /* The appropriate cwnd increase algorithm is performed if, and only * if the cumulative TSN whould advanced and the congestion window is @@ -440,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, * 2) the destination's path MTU. This upper bound protects * against the ACK-Splitting attack outlined in [SAVAGE99]. */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; if (bytes_acked > pmtu) @@ -491,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, void sctp_transport_lower_cwnd(struct sctp_transport *transport, sctp_lower_cwnd_t reason) { + struct sctp_association *asoc = transport->asoc; + switch (reason) { case SCTP_LOWER_CWND_T3_RTX: /* RFC 2960 Section 7.2.3, sctpimpguide @@ -501,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); - transport->cwnd = transport->asoc->pathmtu; + 4*asoc->pathmtu); + transport->cwnd = asoc->pathmtu; - /* T3-rtx also clears fast recovery on the transport */ - transport->fast_recovery = 0; + /* T3-rtx also clears fast recovery */ + asoc->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: @@ -521,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; /* Mark Fast recovery */ - transport->fast_recovery = 1; - transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + asoc->fast_recovery = 1; + asoc->fast_recovery_exit = asoc->next_tsn - 1; transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; break; @@ -549,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, if (time_after(jiffies, transport->last_time_ecne_reduced + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } @@ -565,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * interval. */ transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); break; } @@ -650,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t) t->error_count = 0; t->rto_pending = 0; t->hb_sent = 0; - t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; diff --git a/net/socket.c b/net/socket.c index 35bc198..dae8c6b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - init_waitqueue_head(&ei->socket.wait); + ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); + if (!ei->socket.wq) { + kmem_cache_free(sock_inode_cachep, ei); + return NULL; + } + init_waitqueue_head(&ei->socket.wq->wait); + ei->socket.wq->fasync_list = NULL; - ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; @@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } + +static void wq_free_rcu(struct rcu_head *head) +{ + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + + kfree(wq); +} + static void sock_destroy_inode(struct inode *inode) { - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) @@ -513,7 +529,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->fasync_list) + if (sock->wq->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -655,13 +671,13 @@ inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff sizeof(__u32), &skb->dropcount); } -void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); } -EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -1067,87 +1083,44 @@ static int sock_close(struct inode *inode, struct file *filp) * 1. fasync_list is modified only under process context socket lock * i.e. under semaphore. * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) - * or under socket lock. - * 3. fasync_list can be used from softirq context, so that - * modification under socket lock have to be enhanced with - * write_lock_bh(&sk->sk_callback_lock). - * --ANK (990710) + * or under socket lock */ static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna = NULL, **prev; - struct socket *sock; - struct sock *sk; - - if (on) { - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if (fna == NULL) - return -ENOMEM; - } - - sock = filp->private_data; + struct socket *sock = filp->private_data; + struct sock *sk = sock->sk; - sk = sock->sk; - if (sk == NULL) { - kfree(fna); + if (sk == NULL) return -EINVAL; - } lock_sock(sk); - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); - - prev = &(sock->fasync_list); - - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) - if (fa->fa_file == filp) - break; - - if (on) { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd = fd; - write_unlock_bh(&sk->sk_callback_lock); + fasync_helper(fd, filp, on, &sock->wq->fasync_list); - kfree(fna); - goto out; - } - fna->fa_file = filp; - fna->fa_fd = fd; - fna->magic = FASYNC_MAGIC; - fna->fa_next = sock->fasync_list; - write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list = fna; + if (!sock->wq->fasync_list) + sock_reset_flag(sk, SOCK_FASYNC); + else sock_set_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - } else { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - *prev = fa->fa_next; - if (!sock->fasync_list) - sock_reset_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - kfree(fa); - } - } -out: - release_sock(sock->sk); + release_sock(sk); return 0; } -/* This function may be called only under socket lock or callback_lock */ +/* This function may be called only under socket lock or callback_lock or rcu_lock */ int sock_wake_async(struct socket *sock, int how, int band) { - if (!sock || !sock->fasync_list) + struct socket_wq *wq; + + if (!sock) return -1; + rcu_read_lock(); + wq = rcu_dereference(sock->wq); + if (!wq || !wq->fasync_list) { + rcu_read_unlock(); + return -1; + } switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1159,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - __kill_fasync(sock->fasync_list, SIGIO, band); + kill_fasync(&wq->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - __kill_fasync(sock->fasync_list, SIGURG, band); + kill_fasync(&wq->fasync_list, SIGURG, band); } + rcu_read_unlock(); return 0; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a29f259..ce0d5b3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -419,8 +419,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -436,10 +436,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } } @@ -757,8 +757,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } /* @@ -777,8 +777,8 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } static void svc_tcp_data_ready(struct sock *sk, int count) @@ -791,8 +791,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -1494,8 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 42f09ad..699ade6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -974,7 +974,7 @@ void xprt_reserve(struct rpc_task *task) static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { - return xprt->xid++; + return (__force __be32)xprt->xid++; } static inline void xprt_init_xid(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fd90eb8..edea15a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -679,7 +679,10 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, int ret; dprintk("svcrdma: Creating RDMA socket\n"); - + if (sa->sa_family != AF_INET) { + dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); + return ERR_PTR(-EAFNOSUPPORT); + } cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) return ERR_PTR(-ENOMEM); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cfb20b8..66e889b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; u32 mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_UNCONNECTED) || @@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), !tport->congested); lock_sock(sk); if (res) @@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!tport->congested || !tport->connected)); lock_sock(sk); if (res) @@ -931,7 +931,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1064,7 +1064,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) tipc_disconnect_port(tipc_sk_port(sk)); } - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); return TIPC_OK; } @@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport) { struct sock *sk = (struct sock *)tport->usr_handle; - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /** @@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ release_sock(sk); - res = wait_event_interruptible_timeout(*sk->sk_sleep, + res = wait_event_interruptible_timeout(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state != SS_CONNECTING)), sk->sk_rcvtimeo); @@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue))); lock_sock(sk); if (res) @@ -1632,8 +1632,8 @@ restart: /* Discard any unreceived messages; wake up sleeping tasks */ discard_rx_queue(sk); - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); res = 0; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d9122e..fef2cc5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk) static void unix_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); if (unix_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync(sk->sk_sleep); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* When dgram socket disconnects (or changes its peer), we clear its receive @@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); skpair->sk_state_change(skpair); - read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); - read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; @@ -1142,7 +1143,7 @@ restart: newsk->sk_peercred.pid = task_tgid_vnr(current); current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); newu = unix_sk(newsk); - newsk->sk_sleep = &newu->peer_wait; + newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); /* copy address information from listening to new sock*/ @@ -1736,7 +1737,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) unix_state_lock(sk); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || @@ -1752,7 +1753,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } @@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - read_unlock(&other->sk_callback_lock); } if (other) sock_put(other); @@ -1991,7 +1990,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ @@ -2028,7 +2027,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 14c22c3..c8df6fd 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -153,15 +153,6 @@ void unix_notinflight(struct file *fp) } } -static inline struct sk_buff *sock_queue_head(struct sock *sk) -{ - return (struct sk_buff *)&sk->sk_receive_queue; -} - -#define receive_queue_for_each_skb(sk, next, skb) \ - for (skb = sock_queue_head(sk)->next, next = skb->next; \ - skb != sock_queue_head(sk); skb = next, next = skb->next) - static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { @@ -169,7 +160,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff *next; spin_lock(&x->sk_receive_queue.lock); - receive_queue_for_each_skb(x, next, skb) { + skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { /* * Do we have file descriptors ? */ @@ -225,7 +216,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), * and perform a scan on them as well. */ spin_lock(&x->sk_receive_queue.lock); - receive_queue_for_each_skb(x, next, skb) { + skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { u = unix_sk(skb->sk); /* diff --git a/net/wireless/core.c b/net/wireless/core.c index 6ac70c1..37d0e0a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -705,7 +705,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->ps = true; else wdev->ps = false; - wdev->ps_timeout = 100; + /* allow mac80211 to determine the timeout */ + wdev->ps_timeout = -1; if (rdev->ops->set_power_mgmt) if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 356a84a..01da83dd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -152,6 +152,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 }, [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, + [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, }; /* policy for the attributes */ @@ -2442,6 +2443,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.use_cts_prot = -1; params.use_short_preamble = -1; params.use_short_slot_time = -1; + params.ap_isolate = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = @@ -2458,6 +2460,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.basic_rates_len = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); } + if (info->attrs[NL80211_ATTR_AP_ISOLATE]) + params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); rtnl_lock(); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c273577..8ddf5ae 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -518,12 +518,16 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, ev->type = EVENT_CONNECT_RESULT; if (bssid) memcpy(ev->cr.bssid, bssid, ETH_ALEN); - ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev); - ev->cr.req_ie_len = req_ie_len; - memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len); - ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; - ev->cr.resp_ie_len = resp_ie_len; - memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len); + if (req_ie_len) { + ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev); + ev->cr.req_ie_len = req_ie_len; + memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len); + } + if (resp_ie_len) { + ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; + ev->cr.resp_ie_len = resp_ie_len; + memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len); + } ev->cr.status = status; spin_lock_irqsave(&wdev->event_lock, flags); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index cbddd0c..296e65e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -402,6 +402,7 @@ static void __x25_destroy_socket(struct sock *sk) /* * Queue the unaccepted socket for death */ + skb->sk->sk_state = TCP_LISTEN; sock_set_flag(skb->sk, SOCK_DEAD); x25_start_heartbeat(skb->sk); x25_sk(skb->sk)->state = X25_STATE_0; @@ -718,7 +719,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) DECLARE_WAITQUEUE(wait, current); int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); rc = -ERESTARTSYS; @@ -738,7 +739,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } @@ -838,7 +839,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) DECLARE_WAITQUEUE(wait, current); int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -858,7 +859,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index b9ef682..9005f6d 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -24,6 +24,7 @@ #include <net/sock.h> #include <linux/if_arp.h> #include <net/x25.h> +#include <net/x25device.h> static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) { @@ -115,19 +116,22 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, } switch (skb->data[0]) { - case 0x00: - skb_pull(skb, 1); - if (x25_receive_data(skb, nb)) { - x25_neigh_put(nb); - goto out; - } - break; - case 0x01: - x25_link_established(nb); - break; - case 0x02: - x25_link_terminated(nb); - break; + + case X25_IFACE_DATA: + skb_pull(skb, 1); + if (x25_receive_data(skb, nb)) { + x25_neigh_put(nb); + goto out; + } + break; + + case X25_IFACE_CONNECT: + x25_link_established(nb); + break; + + case X25_IFACE_DISCONNECT: + x25_link_terminated(nb); + break; } x25_neigh_put(nb); drop: @@ -148,7 +152,7 @@ void x25_establish_link(struct x25_neigh *nb) return; } ptr = skb_put(skb, 1); - *ptr = 0x01; + *ptr = X25_IFACE_CONNECT; break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) @@ -184,7 +188,7 @@ void x25_terminate_link(struct x25_neigh *nb) } ptr = skb_put(skb, 1); - *ptr = 0x02; + *ptr = X25_IFACE_DISCONNECT; skb->protocol = htons(ETH_P_X25); skb->dev = nb->dev; @@ -200,7 +204,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb) switch (nb->dev->type) { case ARPHRD_X25: dptr = skb_push(skb, 1); - *dptr = 0x00; + *dptr = X25_IFACE_DATA; break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index e5195c9..1396572 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -16,7 +16,8 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) { - return ntohl(daddr->a4 + saddr->a4); + u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; + return ntohl((__force __be32)sum); } static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7430ac2..31f4ba4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1732,7 +1732,7 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct dst_entry *dst, *dst_orig = *dst_p, *route; u16 family = dst_orig->ops->family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - int i, err, num_pols, num_xfrms, drop_pols = 0; + int i, err, num_pols, num_xfrms = 0, drop_pols = 0; restart: dst = NULL; |