summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/inet_fragment.c5
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c53
-rw-r--r--net/ipv4/ip_tunnel_core.c47
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_output.c45
11 files changed, 109 insertions, 76 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ecd2c3f..8c54870 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
- /* Note : following gso_segment() might change skb->encapsulation */
- udpfrag = !skb->encapsulation && proto == IPPROTO_UDP;
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
@@ -1502,9 +1505,9 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
bhptr = per_cpu_ptr(mib[0], cpu);
syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
do {
- start = u64_stats_fetch_begin_bh(syncp);
+ start = u64_stats_fetch_begin_irq(syncp);
v = *(((u64 *) bhptr) + offt);
- } while (u64_stats_fetch_retry_bh(syncp, start));
+ } while (u64_stats_fetch_retry_irq(syncp, start));
res += v;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bb075fc..3b01959 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
}
work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0) {
+ while (work > 0 || force) {
spin_lock(&nf->lru_lock);
if (list_empty(&nf->lru_list)) {
@@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
+ inet_frag_lru_add(nf, qp);
spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
- inet_frag_lru_add(nf, qp);
+
return qp;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e85445b..1a0755f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to->ipvs_property = from->ipvs_property;
#endif
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 4fff644..66aaf50 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -93,13 +93,14 @@ static void tunnel_dst_reset(struct ip_tunnel *t)
tunnel_dst_set(t, NULL);
}
-static void tunnel_dst_reset_all(struct ip_tunnel *t)
+void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
+EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
{
@@ -119,52 +120,6 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
return (struct rtable *)dst;
}
-/* Often modified stats are per cpu, other are shared (netdev->stats) */
-struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->multicast = dev->stats.multicast;
-
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- tot->collisions = dev->stats.collisions;
-
- return tot;
-}
-EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
-
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
@@ -763,7 +718,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
- tunnel_dst_reset_all(t);
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
}
@@ -1091,7 +1046,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(netdev_priv(dev));
- tunnel_dst_reset_all(tunnel);
+ ip_tunnel_dst_reset_all(tunnel);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6156f4e..b86f0a3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
nf_reset(skb);
secpath_reset(skb);
skb_clear_hash_if_not_l4(skb);
- skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
skb->pkt_type = PACKET_HOST;
@@ -148,3 +147,49 @@ error:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
+
+/* Often modified stats are per cpu, other are shared (netdev->stats) */
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *tstats =
+ per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ tot->collisions = dev->stats.collisions;
+
+ return tot;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c3e0ade..7ebd6e3 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst))
- return PTR_ERR(dst);;
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index d551e31..7c67667 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct,
map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
if (map.from == map.to)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7374905..4bd6d52 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
- *size = tp->fastopen_req->copied;
+ *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
}
@@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
- err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f49351e..2b9464c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size &&
- left < sk->sk_gso_max_segs)
+ left < tp->xmit_size_goal_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6e48093..e1661f4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1944,8 +1944,9 @@ void tcp_enter_loss(struct sock *sk, int how)
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
tp->undo_marker = 0;
+
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -3066,6 +3067,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
flag |= FLAG_RETRANS_DATA_ACKED;
} else {
last_ackt = skb->skb_mstamp;
+ WARN_ON_ONCE(last_ackt.v64 == 0);
if (!first_ackt.v64)
first_ackt = last_ackt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index aaa68f5..e0a7b33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -87,8 +87,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
tcp_rearm_rto(sk);
}
- NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
- tcp_skb_pcount(skb));
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
+ tcp_skb_pcount(skb));
}
/* SND.NXT, if window was not shrunk.
@@ -780,6 +780,17 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
+ /* Here begins the tricky part :
+ * We are called from release_sock() with :
+ * 1) BH disabled
+ * 2) sk_lock.slock spinlock held
+ * 3) socket owned by us (sk->sk_lock.owned == 1)
+ *
+ * But following code is meant to be called from BH handlers,
+ * so we should keep BH disabled, but early release socket ownership
+ */
+ sock_release_ownership(sk);
+
if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
__sock_put(sk);
@@ -873,8 +884,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -882,6 +893,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
+ /* Our usage of tstamp should remain private */
+ skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
@@ -2358,6 +2371,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned int cur_mss;
+ int err;
/* Inconslusive MTU probe */
if (icsk->icsk_mtup.probe_size) {
@@ -2421,11 +2435,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
GFP_ATOMIC);
- return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
} else {
- return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+
+ if (likely(!err))
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ return err;
}
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -2930,7 +2948,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
- syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
+ space = min_t(size_t, space, fo->size);
+
+ /* limit to order-0 allocations */
+ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
+
+ syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
@@ -2960,6 +2983,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
+ /* syn_data is about to be sent, we need to take current time stamps
+ * for the packets that are in write queue : SYN packet and DATA
+ */
+ skb_mstamp_get(&syn->skb_mstamp);
+ data->skb_mstamp = syn->skb_mstamp;
+
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
tp->syn_data = (fo->copied > 0);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
OpenPOWER on IntegriCloud