summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c59
1 files changed, 33 insertions, 26 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197c000..2aa69c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
struct tcp_sock *tp = tcp_sk(sk);
int answ;
+ bool slow;
switch (cmd) {
case SIOCINQ:
if (sk->sk_state == TCP_LISTEN)
return -EINVAL;
- lock_sock(sk);
+ slow = lock_sock_fast(sk);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else if (sock_flag(sk, SOCK_URGINLINE) ||
@@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
answ--;
} else
answ = tp->urg_seq - tp->copied_seq;
- release_sock(sk);
+ unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
@@ -830,8 +831,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
return mss_now;
}
-static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
- size_t psize, int flags)
+static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
{
struct tcp_sock *tp = tcp_sk(sk);
int mss_now, size_goal;
@@ -858,12 +859,9 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
- while (psize > 0) {
+ while (size > 0) {
struct sk_buff *skb = tcp_write_queue_tail(sk);
- struct page *page = pages[poffset / PAGE_SIZE];
int copy, i;
- int offset = poffset % PAGE_SIZE;
- int size = min_t(size_t, psize, PAGE_SIZE - offset);
bool can_coalesce;
if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
@@ -912,8 +910,8 @@ new_segment:
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
- poffset += copy;
- if (!(psize -= copy))
+ offset += copy;
+ if (!(size -= copy))
goto out;
if (skb->len < size_goal || (flags & MSG_OOB))
@@ -960,7 +958,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
flags);
lock_sock(sk);
- res = do_tcp_sendpages(sk, &page, offset, size, flags);
+ res = do_tcp_sendpages(sk, page, offset, size, flags);
release_sock(sk);
return res;
}
@@ -1212,7 +1210,7 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- if (copied && likely(!tp->repair))
+ if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1223,7 +1221,7 @@ wait_for_memory:
}
out:
- if (copied && likely(!tp->repair))
+ if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
return copied + copied_syn;
@@ -1430,12 +1428,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
}
#endif
-static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
u32 offset;
- skb_queue_walk(&sk->sk_receive_queue, skb) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
offset--;
@@ -1443,6 +1441,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
*off = offset;
return skb;
}
+ /* This looks weird, but this can happen if TCP collapsing
+ * splitted a fat GRO packet, while we released socket lock
+ * in skb_splice_bits()
+ */
+ sk_eat_skb(sk, skb, false);
}
return NULL;
}
@@ -1484,7 +1487,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
break;
}
used = recv_actor(desc, skb, offset, len);
- if (used < 0) {
+ if (used <= 0) {
if (!copied)
copied = used;
break;
@@ -1493,15 +1496,19 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
copied += used;
offset += used;
}
- /*
- * If recv_actor drops the lock (e.g. TCP splice
+ /* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
* while aggregating skbs from the socket queue.
*/
- skb = tcp_recv_skb(sk, seq-1, &offset);
- if (!skb || (offset+1 != skb->len))
+ skb = tcp_recv_skb(sk, seq - 1, &offset);
+ if (!skb)
break;
+ /* TCP coalescing might have appended data to the skb.
+ * Try to splice more frags
+ */
+ if (offset + 1 != skb->len)
+ continue;
}
if (tcp_hdr(skb)->fin) {
sk_eat_skb(sk, skb, false);
@@ -1518,8 +1525,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
- if (copied > 0)
+ if (copied > 0) {
+ tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
+ }
return copied;
}
EXPORT_SYMBOL(tcp_read_sock);
@@ -2303,7 +2312,7 @@ void tcp_sock_destruct(struct sock *sk)
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
- return capable(CAP_NET_ADMIN) &&
+ return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
}
@@ -3589,8 +3598,7 @@ void __init tcp_init(void)
alloc_large_system_hash("TCP established",
sizeof(struct inet_ehash_bucket),
thash_entries,
- (totalram_pages >= 128 * 1024) ?
- 13 : 15,
+ 17, /* one slot per 128 KB of memory */
0,
NULL,
&tcp_hashinfo.ehash_mask,
@@ -3606,8 +3614,7 @@ void __init tcp_init(void)
alloc_large_system_hash("TCP bind",
sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
- (totalram_pages >= 128 * 1024) ?
- 13 : 15,
+ 17, /* one slot per 128 KB of memory */
0,
&tcp_hashinfo.bhash_size,
NULL,
OpenPOWER on IntegriCloud