From 51f3d02b980a338cd291d2bc7629cdfb2568424b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2014 16:46:40 -0500 Subject: net: Add and use skb_copy_datagram_msg() helper. This encapsulates all of the skb_copy_datagram_iovec() callers with call argument signature "skb, offset, msghdr->msg_iov, length". When we move to iov_iters in the networking, the iov_iter object will sit in the msghdr. Having a helper like this means there will be less places to touch during that transformation. Based upon descriptions and patch from Al Viro. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39ec0c3..c239f47 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1377,7 +1377,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) /* XXX -- need to support SO_PEEK_OFF */ skb_queue_walk(&sk->sk_write_queue, skb) { - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len); + err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) break; @@ -1833,8 +1833,7 @@ do_prequeue: } if (!(flags & MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, used); + err = skb_copy_datagram_msg(skb, offset, msg, used); if (err) { /* Exception. Bailout! */ if (!copied) -- cgit v1.1 From 7eab8d9e8a722ca07bc785f73e21c3d3418defa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 21:51:23 -0400 Subject: new helper: memcpy_to_msg() Signed-off-by: Al Viro --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c239f47..435443b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1349,7 +1349,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) if (len > 0) { if (!(flags & MSG_TRUNC)) - err = memcpy_toiovec(msg->msg_iov, &c, 1); + err = memcpy_to_msg(msg, &c, 1); len = 1; } else msg->msg_flags |= MSG_TRUNC; -- cgit v1.1 From f4362a2c9524678f0459cf410403f8595e5cfce5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 13:26:06 -0500 Subject: switch tcp_sock->ucopy from iovec (ucopy.iov) to msghdr (ucopy.msg) Signed-off-by: Al Viro --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc13a36..4a96f37 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1729,7 +1729,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { user_recv = current; tp->ucopy.task = user_recv; - tp->ucopy.iov = msg->msg_iov; + tp->ucopy.msg = msg; } tp->ucopy.len = len; -- cgit v1.1 From c0371da6047abd261bc483c744dbc7d81a116172 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 10:42:55 -0500 Subject: put iov_iter into msghdr Note that the code _using_ ->msg_iter at that point will be very unhappy with anything other than unshifted iovec-backed iov_iter. We still need to convert users to proper primitives. Signed-off-by: Al Viro --- net/ipv4/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4a96f37..54ba620 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1085,7 +1085,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - struct iovec *iov; + const struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags, err, copied = 0; @@ -1136,8 +1136,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, mss_now = tcp_send_mss(sk, &size_goal, flags); /* Ok commence sending. */ - iovlen = msg->msg_iovlen; - iov = msg->msg_iov; + iovlen = msg->msg_iter.nr_segs; + iov = msg->msg_iter.iov; copied = 0; err = -EPIPE; -- cgit v1.1 From 605ad7f184b60cfaacbc038aa6c55ee68dee3c89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 7 Dec 2014 12:22:18 -0800 Subject: tcp: refine TSO autosizing Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to control TSO size, but did this at the wrong place (sendmsg() time) At sendmsg() time, we might have a pessimistic view of flow rate, and we end up building very small skbs (with 2 MSS per skb). This is bad because : - It sends small TSO packets even in Slow Start where rate quickly increases. - It tends to make socket write queue very big, increasing tcp_ack() processing time, but also increasing memory needs, not necessarily accounted for, as fast clones overhead is currently ignored. - Lower GRO efficiency and more ACK packets. Servers with a lot of small lived connections suffer from this. Lets instead fill skbs as much as possible (64KB of payload), but split them at xmit time, when we have a precise idea of the flow rate. skb split is actually quite efficient. Patch looks bigger than necessary, because TCP Small Queue decision now has to take place after the eventual split. As Neal suggested, introduce a new tcp_tso_autosize() helper, so that tcp_tso_should_defer() can be synchronized on same goal. Rename tp->xmit_size_goal_segs to tp->gso_segs, as this variable contains number of mss that we can put in GSO packet, and is not related to the autosizing goal anymore. Tested: 40 ms rtt link nstat >/dev/null netperf -H remote -l -2000000 -- -s 1000000 nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets" Before patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/s 87380 2000000 2000000 0.36 44.22 IpInReceives 600 0.0 IpOutRequests 599 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2033249 0.0 After patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 2000000 2000000 0.36 44.27 IpInReceives 221 0.0 IpOutRequests 232 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2013953 0.0 Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 60 ++++++++++++++++++++-------------------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc13a36..427aee3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 xmit_size_goal, old_size_goal; - - xmit_size_goal = mss_now; - - if (large_allowed && sk_can_gso(sk)) { - u32 gso_size, hlen; - - /* Maybe we should/could use sk->sk_prot->max_header here ? */ - hlen = inet_csk(sk)->icsk_af_ops->net_header_len + - inet_csk(sk)->icsk_ext_hdr_len + - tp->tcp_header_len; - - /* Goal is to send at least one packet per ms, - * not one big TSO packet every 100 ms. - * This preserves ACK clocking and is consistent - * with tcp_tso_should_defer() heuristic. - */ - gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); - gso_size = max_t(u32, gso_size, - sysctl_tcp_min_tso_segs * mss_now); - - xmit_size_goal = min_t(u32, gso_size, - sk->sk_gso_max_size - 1 - hlen); - - xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); - - /* We try hard to avoid divides here */ - old_size_goal = tp->xmit_size_goal_segs * mss_now; - - if (likely(old_size_goal <= xmit_size_goal && - old_size_goal + mss_now > xmit_size_goal)) { - xmit_size_goal = old_size_goal; - } else { - tp->xmit_size_goal_segs = - min_t(u16, xmit_size_goal / mss_now, - sk->sk_gso_max_segs); - xmit_size_goal = tp->xmit_size_goal_segs * mss_now; - } + u32 new_size_goal, size_goal, hlen; + + if (!large_allowed || !sk_can_gso(sk)) + return mss_now; + + /* Maybe we should/could use sk->sk_prot->max_header here ? */ + hlen = inet_csk(sk)->icsk_af_ops->net_header_len + + inet_csk(sk)->icsk_ext_hdr_len + + tp->tcp_header_len; + + new_size_goal = sk->sk_gso_max_size - 1 - hlen; + new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); + + /* We try hard to avoid divides here */ + size_goal = tp->gso_segs * mss_now; + if (unlikely(new_size_goal < size_goal || + new_size_goal >= size_goal + mss_now)) { + tp->gso_segs = min_t(u16, new_size_goal / mss_now, + sk->sk_gso_max_segs); + size_goal = tp->gso_segs * mss_now; } - return max(xmit_size_goal, mss_now); + return max(size_goal, mss_now); } static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) -- cgit v1.1