diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 94 |
1 files changed, 77 insertions, 17 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 896e9df..c7adcb5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb); + opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -1514,6 +1514,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (sysctl_tcp_slow_start_after_idle && (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) tcp_cwnd_application_limited(sk); + + /* The following conditions together indicate the starvation + * is caused by insufficient sender buffer: + * 1) just sent some data (see tcp_write_xmit) + * 2) not cwnd limited (this else condition) + * 3) no more data to send (null tcp_send_head ) + * 4) application is hitting buffer limit (SOCK_NOSPACE) + */ + if (!tcp_send_head(sk) && sk->sk_socket && + test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && + (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); } } @@ -2081,6 +2093,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, return false; } +static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) +{ + const u32 now = tcp_time_stamp; + + if (tp->chrono_type > TCP_CHRONO_UNSPEC) + tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; + tp->chrono_start = now; + tp->chrono_type = new; +} + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* If there are multiple conditions worthy of tracking in a + * chronograph then the highest priority enum takes precedence + * over the other conditions. So that if something "more interesting" + * starts happening, stop the previous chrono and start a new one. + */ + if (type > tp->chrono_type) + tcp_chrono_set(tp, type); +} + +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + + /* There are multiple conditions worthy of tracking in a + * chronograph, so that the highest priority enum takes + * precedence over the other conditions (see tcp_chrono_start). + * If a condition stops, we only stop chrono tracking if + * it's the "most interesting" or current chrono we are + * tracking and starts busy chrono if we have pending data. + */ + if (tcp_write_queue_empty(sk)) + tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); + else if (type == tp->chrono_type) + tcp_chrono_set(tp, TCP_CHRONO_BUSY); +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -2103,7 +2156,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; - bool is_cwnd_limited = false; + bool is_cwnd_limited = false, is_rwnd_limited = false; u32 max_segs; sent_pkts = 0; @@ -2140,8 +2193,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) + if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) { + is_rwnd_limited = true; break; + } if (tso_segs == 1) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, @@ -2186,6 +2241,11 @@ repair: break; } + if (is_rwnd_limited) + tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED); + else + tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); + if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; @@ -2514,7 +2574,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, } /* Collapses two adjacent SKB's during retransmission. */ -static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) +static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); @@ -2525,13 +2585,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); + if (next_skb_size) { + if (next_skb_size <= skb_availroom(skb)) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); + else if (!skb_shift(skb, next_skb, next_skb_size)) + return false; + } tcp_highest_sack_combine(sk, next_skb, skb); tcp_unlink_write_queue(next_skb, sk); - skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), - next_skb_size); - if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; @@ -2560,6 +2624,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_skb_collapse_tstamp(skb, next_skb); sk_wmem_free_skb(sk, next_skb); + return true; } /* Check if coalescing SKBs is legal. */ @@ -2567,14 +2632,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return false; - /* TODO: SACK collapsing could be used to remove this condition */ - if (skb_shinfo(skb)->nr_frags != 0) - return false; if (skb_cloned(skb)) return false; if (skb == tcp_send_head(sk)) return false; - /* Some heurestics for collapsing over SACK'd could be invented */ + /* Some heuristics for collapsing over SACK'd could be invented */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) return false; @@ -2612,16 +2674,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (space < 0) break; - /* Punt if not enough space exists in the first SKB for - * the data in the second - */ - if (skb->len > skb_availroom(to)) - break; if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) break; - tcp_collapse_retrans(sk, to); + if (!tcp_collapse_retrans(sk, to)) + break; } } @@ -3300,6 +3358,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) fo->copied = space; tcp_connect_queue_skb(sk, syn_data); + if (syn_data->len) + tcp_chrono_start(sk, TCP_CHRONO_BUSY); err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |