summaryrefslogtreecommitdiffstats
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-05 12:31:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-05 12:31:59 -0700
commit5518b69b76680a4f2df96b1deca260059db0c2de (patch)
treef33cd1519c8efb4590500f2f9617400be233238c /net/core/skbuff.c
parent8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff)
parent0e72582270c07850b92cac351c8b97d4f9c123b9 (diff)
downloadop-kernel-dev-5518b69b76680a4f2df96b1deca260059db0c2de.zip
op-kernel-dev-5518b69b76680a4f2df96b1deca260059db0c2de.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Reasonably busy this cycle, but perhaps not as busy as in the 4.12 merge window: 1) Several optimizations for UDP processing under high load from Paolo Abeni. 2) Support pacing internally in TCP when using the sch_fq packet scheduler for this is not practical. From Eric Dumazet. 3) Support mutliple filter chains per qdisc, from Jiri Pirko. 4) Move to 1ms TCP timestamp clock, from Eric Dumazet. 5) Add batch dequeueing to vhost_net, from Jason Wang. 6) Flesh out more completely SCTP checksum offload support, from Davide Caratti. 7) More plumbing of extended netlink ACKs, from David Ahern, Pablo Neira Ayuso, and Matthias Schiffer. 8) Add devlink support to nfp driver, from Simon Horman. 9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa Prabhu. 10) Add stack depth tracking to BPF verifier and use this information in the various eBPF JITs. From Alexei Starovoitov. 11) Support XDP on qed device VFs, from Yuval Mintz. 12) Introduce BPF PROG ID for better introspection of installed BPF programs. From Martin KaFai Lau. 13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann. 14) For loads, allow narrower accesses in bpf verifier checking, from Yonghong Song. 15) Support MIPS in the BPF selftests and samples infrastructure, the MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David Daney. 16) Support kernel based TLS, from Dave Watson and others. 17) Remove completely DST garbage collection, from Wei Wang. 18) Allow installing TCP MD5 rules using prefixes, from Ivan Delalande. 19) Add XDP support to Intel i40e driver, from Björn Töpel 20) Add support for TC flower offload in nfp driver, from Simon Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub Kicinski, and Bert van Leeuwen. 21) IPSEC offloading support in mlx5, from Ilan Tayari. 22) Add HW PTP support to macb driver, from Rafal Ozieblo. 23) Networking refcount_t conversions, From Elena Reshetova. 24) Add sock_ops support to BPF, from Lawrence Brako. This is useful for tuning the TCP sockopt settings of a group of applications, currently via CGROUPs" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits) net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap cxgb4: Support for get_ts_info ethtool method cxgb4: Add PTP Hardware Clock (PHC) support cxgb4: time stamping interface for PTP nfp: default to chained metadata prepend format nfp: remove legacy MAC address lookup nfp: improve order of interfaces in breakout mode net: macb: remove extraneous return when MACB_EXT_DESC is defined bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case bpf: fix return in load_bpf_file mpls: fix rtm policy in mpls_getroute net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t net, ax25: convert ax25_route.refcount from atomic_t to refcount_t net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t ...
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c188
1 files changed, 124 insertions, 64 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1be7c0..8b11341 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->head = NULL;
skb->truesize = sizeof(struct sk_buff);
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->mac_header = (typeof(skb->mac_header))~0U;
out:
@@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
skb->pfmemalloc = pfmemalloc;
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
@@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
- atomic_set(&fclones->fclone_ref, 1);
+ refcount_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
@@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = SKB_TRUESIZE(size);
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
@@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb)
* This test would have no chance to be true for the clone,
* while here, branch prediction will be good.
*/
- if (atomic_read(&fclones->fclone_ref) == 1)
+ if (refcount_read(&fclones->fclone_ref) == 1)
goto fastpath;
break;
@@ -637,18 +637,16 @@ static void kfree_skbmem(struct sk_buff *skb)
fclones = container_of(skb, struct sk_buff_fclones, skb2);
break;
}
- if (!atomic_dec_and_test(&fclones->fclone_ref))
+ if (!refcount_dec_and_test(&fclones->fclone_ref))
return;
fastpath:
kmem_cache_free(skbuff_fclone_cache, fclones);
}
-static void skb_release_head_state(struct sk_buff *skb)
+void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
-#ifdef CONFIG_XFRM
- secpath_put(skb->sp);
-#endif
+ secpath_reset(skb);
if (skb->destructor) {
WARN_ON(in_irq());
skb->destructor(skb);
@@ -694,12 +692,9 @@ EXPORT_SYMBOL(__kfree_skb);
*/
void kfree_skb(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
trace_kfree_skb(skb, __builtin_return_address(0));
__kfree_skb(skb);
}
@@ -746,17 +741,32 @@ EXPORT_SYMBOL(skb_tx_error);
*/
void consume_skb(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
trace_consume_skb(skb);
__kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
+/**
+ * consume_stateless_skb - free an skbuff, assuming it is stateless
+ * @skb: buffer to free
+ *
+ * Works like consume_skb(), but this variant assumes that all the head
+ * states have been already dropped.
+ */
+void consume_stateless_skb(struct sk_buff *skb)
+{
+ if (!skb_unref(skb))
+ return;
+
+ trace_consume_skb(skb);
+ if (likely(skb->head))
+ skb_release_data(skb);
+ kfree_skbmem(skb);
+}
+
void __kfree_skb_flush(void)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -807,10 +817,9 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
/* if reaching here SKB is ready to free */
trace_consume_skb(skb);
@@ -906,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(head_frag);
C(data);
C(truesize);
- atomic_set(&n->users, 1);
+ refcount_set(&n->users, 1);
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
@@ -1018,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return NULL;
if (skb->fclone == SKB_FCLONE_ORIG &&
- atomic_read(&fclones->fclone_ref) == 1) {
+ refcount_read(&fclones->fclone_ref) == 1) {
n = &fclones->skb2;
- atomic_set(&fclones->fclone_ref, 2);
+ refcount_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -1412,7 +1421,7 @@ EXPORT_SYMBOL(skb_pad);
* returned.
*/
-unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
{
if (tail != skb) {
skb->data_len += len;
@@ -1431,9 +1440,9 @@ EXPORT_SYMBOL_GPL(pskb_put);
* exceed the total buffer size the kernel will panic. A pointer to the
* first byte of the extra data is returned.
*/
-unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+void *skb_put(struct sk_buff *skb, unsigned int len)
{
- unsigned char *tmp = skb_tail_pointer(skb);
+ void *tmp = skb_tail_pointer(skb);
SKB_LINEAR_ASSERT(skb);
skb->tail += len;
skb->len += len;
@@ -1452,7 +1461,7 @@ EXPORT_SYMBOL(skb_put);
* start. If this would exceed the total buffer headroom the kernel will
* panic. A pointer to the first byte of the extra data is returned.
*/
-unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+void *skb_push(struct sk_buff *skb, unsigned int len)
{
skb->data -= len;
skb->len += len;
@@ -1472,7 +1481,7 @@ EXPORT_SYMBOL(skb_push);
* is returned. Once the data has been pulled future pushes will overwrite
* the old data.
*/
-unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+void *skb_pull(struct sk_buff *skb, unsigned int len)
{
return skb_pull_inline(skb, len);
}
@@ -1607,7 +1616,7 @@ EXPORT_SYMBOL(___pskb_trim);
*
* It is pretty complicated. Luckily, it is called only in exceptional cases.
*/
-unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+void *__pskb_pull_tail(struct sk_buff *skb, int delta)
{
/* If skb has not enough free space at tail, get new one
* plus 128 bytes for future expansions. If we have enough
@@ -2243,6 +2252,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
+ int offset, int len)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static const struct skb_checksum_ops default_crc32c_ops = {
+ .update = warn_crc32c_csum_update,
+ .combine = warn_crc32c_csum_combine,
+};
+
+const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
+ &default_crc32c_ops;
+EXPORT_SYMBOL(crc32c_csum_stub);
+
/**
* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
* @from: source buffer
@@ -2620,7 +2655,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
- skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+ skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
@@ -2988,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
get_page(pfrag->page);
skb->truesize += copy;
- atomic_add(copy, &sk->sk_wmem_alloc);
+ refcount_add(copy, &sk->sk_wmem_alloc);
skb->len += copy;
skb->data_len += copy;
offset += copy;
@@ -3029,7 +3065,7 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags);
* that the checksum difference is zero (e.g., a valid IP header)
* or you are setting ip_summed to CHECKSUM_NONE.
*/
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
{
unsigned char *data = skb->data;
@@ -3235,8 +3271,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
- SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
@@ -3482,24 +3518,18 @@ void __init skb_init(void)
NULL);
}
-/**
- * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
- * @skb: Socket buffer containing the buffers to be mapped
- * @sg: The scatter-gather list to map into
- * @offset: The offset into the buffer's contents to start mapping
- * @len: Length of buffer space to be mapped
- *
- * Fill the specified scatter-gather list with mappings/pointers into a
- * region of the buffer space attached to a socket buffer.
- */
static int
-__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
+ unsigned int recursion_level)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
int elt = 0;
+ if (unlikely(recursion_level >= 24))
+ return -EMSGSIZE;
+
if (copy > 0) {
if (copy > len)
copy = len;
@@ -3518,6 +3548,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+ return -EMSGSIZE;
if (copy > len)
copy = len;
@@ -3532,16 +3564,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
}
skb_walk_frags(skb, frag_iter) {
- int end;
+ int end, ret;
WARN_ON(start > offset + len);
end = start + frag_iter->len;
if ((copy = end - offset) > 0) {
+ if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+ return -EMSGSIZE;
+
if (copy > len)
copy = len;
- elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
- copy);
+ ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+ copy, recursion_level + 1);
+ if (unlikely(ret < 0))
+ return ret;
+ elt += ret;
if ((len -= copy) == 0)
return elt;
offset += copy;
@@ -3552,6 +3590,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/**
+ * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ * @skb: Socket buffer containing the buffers to be mapped
+ * @sg: The scatter-gather list to map into
+ * @offset: The offset into the buffer's contents to start mapping
+ * @len: Length of buffer space to be mapped
+ *
+ * Fill the specified scatter-gather list with mappings/pointers into a
+ * region of the buffer space attached to a socket buffer. Returns either
+ * the number of scatterlist items used, or -EMSGSIZE if the contents
+ * could not fit.
+ */
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+ int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
+
+ if (nsg <= 0)
+ return nsg;
+
+ sg_mark_end(&sg[nsg - 1]);
+
+ return nsg;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
* sglist without mark the sg which contain last skb data as the end.
* So the caller can mannipulate sg list as will when padding new data after
@@ -3574,19 +3637,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
int offset, int len)
{
- return __skb_to_sgvec(skb, sg, offset, len);
+ return __skb_to_sgvec(skb, sg, offset, len, 0);
}
EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
-int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
- int nsg = __skb_to_sgvec(skb, sg, offset, len);
- sg_mark_end(&sg[nsg - 1]);
-
- return nsg;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
/**
* skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -3789,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb)
struct sock *sk = skb->sk;
struct sk_buff *clone;
- if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -3860,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
*/
- if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
@@ -3878,6 +3933,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
+ if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+ return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
@@ -3899,7 +3958,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
return;
if (tsonly) {
- skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+ skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags &
+ SKBTX_ANY_TSTAMP;
skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
}
@@ -3937,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
*/
- if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
err = sock_queue_err_skb(sk, skb);
sock_put(sk);
}
OpenPOWER on IntegriCloud