summaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-06-05 12:42:19 -0400
committerDavid S. Miller <davem@davemloft.net>2018-06-05 12:42:19 -0400
commitfd129f8941cf2309def29b5c8a23b62faff0c9d0 (patch)
tree6ad8afbb59eaf14cfa9f0c4bad498254e6ff1e66 /include/net
parenta6fa9087fc280bba8a045d11d9b5d86cbf9a3a83 (diff)
parent9fa06104a235f64d6a2bf3012cc9966e8e4be5eb (diff)
downloadop-kernel-dev-fd129f8941cf2309def29b5c8a23b62faff0c9d0.zip
op-kernel-dev-fd129f8941cf2309def29b5c8a23b62faff0c9d0.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-06-05 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add a new BPF hook for sendmsg similar to existing hooks for bind and connect: "This allows to override source IP (including the case when it's set via cmsg(3)) and destination IP:port for unconnected UDP (slow path). TCP and connected UDP (fast path) are not affected. This makes UDP support complete, that is, connected UDP is handled by connect hooks, unconnected by sendmsg ones.", from Andrey. 2) Rework of the AF_XDP API to allow extending it in future for type writer model if necessary. In this mode a memory window is passed to hardware and multiple frames might be filled into that window instead of just one that is the case in the current fixed frame-size model. With the new changes made this can be supported without having to add a new descriptor format. Also, core bits for the zero-copy support for AF_XDP have been merged as agreed upon, where i40e bits will be routed via Jeff later on. Various improvements to documentation and sample programs included as well, all from Björn and Magnus. 3) Given BPF's flexibility, a new program type has been added to implement infrared decoders. Quote: "The kernel IR decoders support the most widely used IR protocols, but there are many protocols which are not supported. [...] There is a 'long tail' of unsupported IR protocols, for which lircd is need to decode the IR. IR encoding is done in such a way that some simple circuit can decode it; therefore, BPF is ideal. [...] user-space can define a decoder in BPF, attach it to the rc device through the lirc chardev.", from Sean. 4) Several improvements and fixes to BPF core, among others, dumping map and prog IDs into fdinfo which is a straight forward way to correlate BPF objects used by applications, removing an indirect call and therefore retpoline in all map lookup/update/delete calls by invoking the callback directly for 64 bit archs, adding a new bpf_skb_cgroup_id() BPF helper for tc BPF programs to have an efficient way of looking up cgroup v2 id for policy or other use cases. Fixes to make sure we zero tunnel/xfrm state that hasn't been filled, to allow context access wrt pt_regs in 32 bit archs for tracing, and last but not least various test cases for fixes that landed in bpf earlier, from Daniel. 5) Get rid of the ndo_xdp_flush API and extend the ndo_xdp_xmit with a XDP_XMIT_FLUSH flag instead which allows to avoid one indirect call as flushing is now merged directly into ndo_xdp_xmit(), from Jesper. 6) Add a new bpf_get_current_cgroup_id() helper that can be used in tracing to retrieve the cgroup id from the current process in order to allow for e.g. aggregation of container-level events, from Yonghong. 7) Two follow-up fixes for BTF to reject invalid input values and related to that also two test cases for BPF kselftests, from Martin. 8) Various API improvements to the bpf_fib_lookup() helper, that is, dropping MPLS bits which are not fully hashed out yet, rejecting invalid helper flags, returning error for unsupported address families as well as renaming flowlabel to flowinfo, from David. 9) Various fixes and improvements to sockmap BPF kselftests in particular in proper error detection and data verification, from Prashant. 10) Two arm32 BPF JIT improvements. One is to fix imm range check with regards to whether immediate fits into 24 bits, and a naming cleanup to get functions related to rsh handling consistent to those handling lsh, from Wang. 11) Two compile warning fixes in BPF, one for BTF and a false positive to silent gcc in stack_map_get_build_id_offset(), from Arnd. 12) Add missing seg6.h header into tools include infrastructure in order to fix compilation of BPF kselftests, from Mathieu. 13) Several formatting cleanups in the BPF UAPI helper description that also fix an error during rst2man compilation, from Quentin. 14) Hide an unused variable in sk_msg_convert_ctx_access() when IPv6 is not built into the kernel, from Yue. 15) Remove a useless double assignment in dev_map_enqueue(), from Colin. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/xdp.h14
-rw-r--r--include/net/xdp_sock.h44
2 files changed, 57 insertions, 1 deletions
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 7ad7792..2deea71 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -37,9 +37,14 @@ enum xdp_mem_type {
MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
MEM_TYPE_PAGE_POOL,
+ MEM_TYPE_ZERO_COPY,
MEM_TYPE_MAX,
};
+/* XDP flags for ndo_xdp_xmit */
+#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
+#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
+
struct xdp_mem_info {
u32 type; /* enum xdp_mem_type, but known size type */
u32 id;
@@ -47,6 +52,10 @@ struct xdp_mem_info {
struct page_pool;
+struct zero_copy_allocator {
+ void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
+};
+
struct xdp_rxq_info {
struct net_device *dev;
u32 queue_index;
@@ -59,6 +68,7 @@ struct xdp_buff {
void *data_end;
void *data_meta;
void *data_hard_start;
+ unsigned long handle;
struct xdp_rxq_info *rxq;
};
@@ -82,6 +92,10 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
int metasize;
int headroom;
+ /* TODO: implement clone, copy, use "native" MEM_TYPE */
+ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+ return NULL;
+
/* Assure headroom is available for storing info */
headroom = xdp->data - xdp->data_hard_start;
metasize = xdp->data - xdp->data_meta;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 7a647c5..9fe472f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -6,12 +6,46 @@
#ifndef _LINUX_XDP_SOCK_H
#define _LINUX_XDP_SOCK_H
+#include <linux/workqueue.h>
+#include <linux/if_xdp.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
#include <net/sock.h>
struct net_device;
struct xsk_queue;
-struct xdp_umem;
+
+struct xdp_umem_props {
+ u64 chunk_mask;
+ u64 size;
+};
+
+struct xdp_umem_page {
+ void *addr;
+ dma_addr_t dma;
+};
+
+struct xdp_umem {
+ struct xsk_queue *fq;
+ struct xsk_queue *cq;
+ struct xdp_umem_page *pages;
+ struct xdp_umem_props props;
+ u32 headroom;
+ u32 chunk_size_nohr;
+ struct user_struct *user;
+ struct pid *pid;
+ unsigned long address;
+ refcount_t users;
+ struct work_struct work;
+ struct page **pgs;
+ u32 npgs;
+ struct net_device *dev;
+ u16 queue_id;
+ bool zc;
+ spinlock_t xsk_list_lock;
+ struct list_head xsk_list;
+};
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
@@ -22,6 +56,8 @@ struct xdp_sock {
struct list_head flush_node;
u16 queue_id;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
+ struct list_head list;
+ bool zc;
/* Protects multiple processes in the control path */
struct mutex mutex;
u64 rx_dropped;
@@ -33,6 +69,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+/* Used from netdev driver */
+u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
+void xsk_umem_discard_addr(struct xdp_umem *umem);
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
+void xsk_umem_consume_tx_done(struct xdp_umem *umem);
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
OpenPOWER on IntegriCloud