summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/inet_connection_sock.c34
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/tcp.c59
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/udp.c73
6 files changed, 135 insertions, 58 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e92f1fd..5df2f6a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
ip_mc_up(in_dev);
/* fall through */
case NETDEV_CHANGEADDR:
- if (IN_DEV_ARP_NOTIFY(in_dev))
- arp_send(ARPOP_REQUEST, ETH_P_ARP,
- in_dev->ifa_list->ifa_address,
- dev,
- in_dev->ifa_list->ifa_address,
- NULL, dev->dev_addr, NULL);
+ /* Send gratuitous ARP to notify of link change */
+ if (IN_DEV_ARP_NOTIFY(in_dev)) {
+ struct in_ifaddr *ifa = in_dev->ifa_list;
+
+ if (ifa)
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ ifa->ifa_address, dev,
+ ifa->ifa_address, NULL,
+ dev->dev_addr, NULL);
+ }
break;
case NETDEV_DOWN:
ip_mc_down(in_dev);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4351ca2..537731b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries;
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+/* Decide when to expire the request and when to resend SYN-ACK */
+static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
+ const int max_retries,
+ const u8 rskq_defer_accept,
+ int *expire, int *resend)
+{
+ if (!rskq_defer_accept) {
+ *expire = req->retrans >= thresh;
+ *resend = 1;
+ return;
+ }
+ *expire = req->retrans >= thresh &&
+ (!inet_rsk(req)->acked || req->retrans >= max_retries);
+ /*
+ * Do not resend while waiting for data after ACK,
+ * start to resend on end of deferring period to give
+ * last chance for data or ACK to create established socket.
+ */
+ *resend = !inet_rsk(req)->acked ||
+ req->retrans >= rskq_defer_accept - 1;
+}
+
void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
@@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) {
- if ((req->retrans < thresh ||
- (inet_rsk(req)->acked && req->retrans < max_retries))
- && !req->rsk_ops->rtx_syn_ack(parent, req)) {
+ int expire = 0, resend = 0;
+
+ syn_ack_recalc(req, thresh, max_retries,
+ queue->rskq_defer_accept,
+ &expire, &resend);
+ if (!expire &&
+ (!resend ||
+ !req->rsk_ops->rtx_syn_ack(parent, req) ||
+ inet_rsk(req)->acked)) {
unsigned long timeo;
if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0c0b6e3..e982b5c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
}
dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
- if (dev) {
+ if (dev)
mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
} else
- dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
+ dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
err = -EADDRNOTAVAIL;
if (!dev)
break;
+ dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 64d0af6..90b2e06 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk)
EXPORT_SYMBOL(tcp_enter_memory_pressure);
+/* Convert seconds to retransmits based on initial and max timeout */
+static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
+{
+ u8 res = 0;
+
+ if (seconds > 0) {
+ int period = timeout;
+
+ res = 1;
+ while (seconds > period && res < 255) {
+ res++;
+ timeout <<= 1;
+ if (timeout > rto_max)
+ timeout = rto_max;
+ period += timeout;
+ }
+ }
+ return res;
+}
+
+/* Convert retransmits to seconds based on initial and max timeout */
+static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+{
+ int period = 0;
+
+ if (retrans > 0) {
+ period = timeout;
+ while (--retrans) {
+ timeout <<= 1;
+ if (timeout > rto_max)
+ timeout = rto_max;
+ period += timeout;
+ }
+ }
+ return period;
+}
+
/*
* Wait for a TCP event.
*
@@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto found_ok_skb;
if (tcp_hdr(skb)->fin)
goto found_fin_ok;
- WARN_ON(!(flags & MSG_PEEK));
+ if (WARN_ON(!(flags & MSG_PEEK)))
+ printk(KERN_INFO "recvmsg bug 2: copied %X "
+ "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
}
/* Well, if we have backlog, try to process it now yet. */
@@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_DEFER_ACCEPT:
- icsk->icsk_accept_queue.rskq_defer_accept = 0;
- if (val > 0) {
- /* Translate value in seconds to number of
- * retransmits */
- while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
- val > ((TCP_TIMEOUT_INIT / HZ) <<
- icsk->icsk_accept_queue.rskq_defer_accept))
- icsk->icsk_accept_queue.rskq_defer_accept++;
- icsk->icsk_accept_queue.rskq_defer_accept++;
- }
+ /* Translate value in seconds to number of retransmits */
+ icsk->icsk_accept_queue.rskq_defer_accept =
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
@@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
- ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+ val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+ TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624c3c9..4c03598 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!(flg & TCP_FLAG_ACK))
return NULL;
- /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
- if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
+ if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
return NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ec6a8a..d0d436d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -841,6 +841,42 @@ out:
return ret;
}
+
+/**
+ * first_packet_length - return length of first packet in receive queue
+ * @sk: socket
+ *
+ * Drops all bad checksum frames, until a valid one is found.
+ * Returns the length of found skb, or 0 if none is found.
+ */
+static unsigned int first_packet_length(struct sock *sk)
+{
+ struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+ struct sk_buff *skb;
+ unsigned int res;
+
+ __skb_queue_head_init(&list_kill);
+
+ spin_lock_bh(&rcvq->lock);
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ __skb_unlink(skb, rcvq);
+ __skb_queue_tail(&list_kill, skb);
+ }
+ res = skb ? skb->len : 0;
+ spin_unlock_bh(&rcvq->lock);
+
+ if (!skb_queue_empty(&list_kill)) {
+ lock_sock(sk);
+ __skb_queue_purge(&list_kill);
+ sk_mem_reclaim_partial(sk);
+ release_sock(sk);
+ }
+ return res;
+}
+
/*
* IOCTL requests applicable to the UDP protocol
*/
@@ -857,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
case SIOCINQ:
{
- struct sk_buff *skb;
- unsigned long amount;
+ unsigned int amount = first_packet_length(sk);
- amount = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
+ if (amount)
/*
* We will only return the amount
* of this packet since that is all
* that will be read.
*/
- amount = skb->len - sizeof(struct udphdr);
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ amount -= sizeof(struct udphdr);
+
return put_user(amount, (int __user *)arg);
}
@@ -1540,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
- int is_lite = IS_UDPLITE(sk);
/* Check for false positives due to checksum errors */
- if ((mask & POLLRDNORM) &&
- !(file->f_flags & O_NONBLOCK) &&
- !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
- struct sk_buff *skb;
-
- spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL &&
- udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INERRORS, is_lite);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- }
- spin_unlock_bh(&rcvq->lock);
-
- /* nothing to see, move along */
- if (skb == NULL)
- mask &= ~(POLLIN | POLLRDNORM);
- }
+ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
+ mask &= ~(POLLIN | POLLRDNORM);
return mask;
OpenPOWER on IntegriCloud