diff options
author | Eric Dumazet <edumazet@google.com> | 2016-04-29 14:16:47 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-02 17:02:25 -0400 |
commit | c10d9310edf5aa4a676991139d1a43ec7d87e56b (patch) | |
tree | 8ba2a9ec1a83011976369f96481220cd7945eab6 /net/ipv4/tcp_input.c | |
parent | 5e59c83f2335d634946e1a4fe7bc1e921930f132 (diff) | |
download | op-kernel-dev-c10d9310edf5aa4a676991139d1a43ec7d87e56b.zip op-kernel-dev-c10d9310edf5aa4a676991139d1a43ec7d87e56b.tar.gz |
tcp: do not assume TCP code is non preemptible
We want to to make TCP stack preemptible, as draining prequeue
and backlog queues can take lot of time.
Many SNMP updates were assuming that BH (and preemption) was disabled.
Need to convert some __NET_INC_STATS() calls to NET_INC_STATS()
and some __TCP_INC_STATS() to TCP_INC_STATS()
Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset()
and tcp_v4_send_ack(), we add an explicit preempt disabled section.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 96 |
1 files changed, 48 insertions, 48 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1fb19c9..ac85fb4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -869,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, else mib_idx = LINUX_MIB_TCPSACKREORDER; - __NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1062,7 +1062,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { dup_sack = true; tcp_dsack_seen(tp); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); @@ -1071,7 +1071,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, !before(start_seq_0, start_seq_1)) { dup_sack = true; tcp_dsack_seen(tp); - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV); } } @@ -1289,7 +1289,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED); return false; } @@ -1314,7 +1314,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); return true; } @@ -1473,7 +1473,7 @@ noop: return skb; fallback: - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } @@ -1661,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, mib_idx = LINUX_MIB_TCPSACKDISCARD; } - __NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -1913,7 +1913,7 @@ void tcp_enter_loss(struct sock *sk) skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; } @@ -2399,7 +2399,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) else mib_idx = LINUX_MIB_TCPFULLUNDO; - __NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2421,7 +2421,7 @@ static bool tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); return true; } return false; @@ -2436,9 +2436,9 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; if (frto_undo || tcp_is_sack(tp)) @@ -2563,7 +2563,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; icsk->icsk_mtup.probe_size = 0; - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); } static void tcp_mtup_probe_success(struct sock *sk) @@ -2583,7 +2583,7 @@ static void tcp_mtup_probe_success(struct sock *sk) icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; icsk->icsk_mtup.probe_size = 0; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); } /* Do a simple retransmit without using the backoff mechanisms in @@ -2647,7 +2647,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) else mib_idx = LINUX_MIB_TCPSACKRECOVERY; - __NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; tcp_init_undo(tp); @@ -2740,7 +2740,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) DBGUNDO(sk, "partial recovery"); tcp_undo_cwnd_reduction(sk, true); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); tcp_try_keep_open(sk); return true; } @@ -3434,7 +3434,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { - __NET_INC_STATS(net, mib_idx); + NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } } @@ -3467,7 +3467,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) challenge_count = 0; } if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } @@ -3516,7 +3516,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); tcp_try_keep_open(sk); - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBERECOVERY); } else if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP | FLAG_DATA_SACKED))) { @@ -3621,14 +3621,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { u32 ack_ev_flags = CA_ACK_SLOWPATH; if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); @@ -4131,7 +4131,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) else mib_idx = LINUX_MIB_TCPDSACKOFOSENT; - __NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; @@ -4155,7 +4155,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { @@ -4305,7 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, atomic_add(delta, &sk->sk_rmem_alloc); sk_mem_charge(sk, delta); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; @@ -4393,7 +4393,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_ecn_check_ce(tp, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); tcp_drop(sk, skb); return; } @@ -4402,7 +4402,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; inet_csk_schedule_ack(sk); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -4457,7 +4457,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); @@ -4496,7 +4496,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } @@ -4661,7 +4661,7 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an immediate ack. */ - __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: @@ -4707,7 +4707,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, __skb_unlink(skb, list); __kfree_skb(skb); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } @@ -4866,7 +4866,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) bool res = false; if (!skb_queue_empty(&tp->out_of_order_queue)) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); /* Reset SACK state. A conforming SACK implementation will @@ -4895,7 +4895,7 @@ static int tcp_prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); @@ -4925,7 +4925,7 @@ static int tcp_prune_queue(struct sock *sk) * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. */ - __NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ tp->pred_flags = 0; @@ -5184,7 +5184,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDPAWS, &tp->last_oow_ack_time)) @@ -5236,8 +5236,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { syn_challenge: if (syn_inerr) - __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5352,7 +5352,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return; } else { /* Header too small */ - __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -5380,7 +5380,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); eaten = 1; } @@ -5403,7 +5403,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ eaten = tcp_queue_rcv(sk, skb, tcp_header_len, @@ -5460,8 +5460,8 @@ step5: return; csum_error: - __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); discard: tcp_drop(sk, skb); @@ -5553,13 +5553,13 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, break; } tcp_rearm_rto(sk); - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; } tp->syn_data_acked = tp->syn_data; if (tp->syn_data_acked) - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); tcp_fastopen_add_skb(sk, synack); @@ -5595,7 +5595,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - __NET_INC_STATS(sock_net(sk), + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } @@ -5965,7 +5965,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } @@ -6022,7 +6022,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } @@ -6224,7 +6224,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * timeout. */ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; } @@ -6271,7 +6271,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (dst && strict && !tcp_peer_is_proven(req, dst, true, tmp_opt.saw_tstamp)) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } } |