summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-06-03 08:11:35 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-06-03 08:11:36 -0700
commitea9916ea3ed98d0a1f67f5cbe8ed8ae28e37f8c8 (patch)
tree98d1d3475a3b6ca67e1f6eaa65fb4774fa1950cd
parent69b450789136f70005f8d36315d875158ea430cf (diff)
parentc1ece6b245bd12a57124da78abafbf8a511394d6 (diff)
downloadop-kernel-dev-ea9916ea3ed98d0a1f67f5cbe8ed8ae28e37f8c8.zip
op-kernel-dev-ea9916ea3ed98d0a1f67f5cbe8ed8ae28e37f8c8.tar.gz
Merge branch 'ndo_xdp_xmit-cleanup'
Jesper Dangaard Brouer says: ==================== As I mentioned in merge commit 10f678683e4 ("Merge branch 'xdp_xmit-bulking'") I plan to change the API for ndo_xdp_xmit once more, by adding a flags argument, which is done in this patchset. I know it is late in the cycle (currently at rc7), but it would be nice to avoid changing NDOs over several kernel releases, as it is annoying to vendors and distro backporters, but it is not strictly UAPI so it is allowed (according to Alexei). The end-goal is getting rid of the ndo_xdp_flush operation, as it will make it possible for drivers to implement a TXQ synchronization mechanism that is not necessarily derived from the CPU id (smp_processor_id). This patchset removes all callers of the ndo_xdp_flush operation, but it doesn't take the last step of removing it from all drivers. This can be done later, or I can update the patchset on request. Micro-benchmarks only show a very small performance improvement, for map-redirect around ~2 ns, and for non-map redirect ~7 ns. I've not benchmarked this with CONFIG_RETPOLINE, but the performance benefit should be more visible given we end-up removing an indirect call. --- V2: Updated based on feedback from Song Liu <songliubraving@fb.com> ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c14
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c23
-rw-r--r--drivers/net/tun.c25
-rw-r--r--drivers/net/virtio_net.c9
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/net/xdp.h4
-rw-r--r--kernel/bpf/devmap.c19
-rw-r--r--net/core/filter.c3
9 files changed, 72 insertions, 35 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9b698c5..5f01e4c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/
-int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_ring *xdp_ring;
int drops = 0;
int i;
@@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ xdp_ring = vsi->xdp_rings[queue_index];
+
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
- err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
+ err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ i40e_xdp_ring_update_tail(xdp_ring);
+
return n - drops;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index eb8804b..820f76d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
void i40e_xdp_flush(struct net_device *dev);
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 031d65c..4fd77c9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
+{
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
- struct xdp_frame **frames)
+ struct xdp_frame **frames, u32 flags)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
@@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
@@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
}
}
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ ixgbe_xdp_ring_update_tail(ring);
+
return n - drops;
}
@@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
if (unlikely(!ring))
return;
- /* Force memory writes to complete before letting h/w know there
- * are new descriptors to fetch.
- */
- wmb();
- writel(ring->next_to_use, ring->tail);
+ ixgbe_xdp_ring_update_tail(ring);
return;
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2265d2c..d82a05f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
-static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
+static void __tun_xdp_flush_tfile(struct tun_file *tfile)
+{
+ /* Notify and wake up reader process */
+ if (tfile->flags & TUN_FASYNC)
+ kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+}
+
+static int tun_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
@@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
int cnt = n;
int i;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
@@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
}
spin_unlock(&tfile->tx_ring.producer_lock);
+ if (flags & XDP_XMIT_FLUSH)
+ __tun_xdp_flush_tfile(tfile);
+
rcu_read_unlock();
return cnt - drops;
}
@@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame))
return -EOVERFLOW;
- return tun_xdp_xmit(dev, 1, &frame);
+ return tun_xdp_xmit(dev, 1, &frame, 0);
}
static void tun_xdp_flush(struct net_device *dev)
@@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
- /* Notify and wake up reader process */
- if (tfile->flags & TUN_FASYNC)
- kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
- tfile->socket.sk->sk_data_ready(tfile->socket.sk);
-
+ __tun_xdp_flush_tfile(tfile);
out:
rcu_read_unlock();
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b2647dd..62ba8aa 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
}
static int virtnet_xdp_xmit(struct net_device *dev,
- int n, struct xdp_frame **frames)
+ int n, struct xdp_frame **frames, u32 flags)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
@@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
int err;
int i;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
@@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
drops++;
}
}
+
+ if (flags & XDP_XMIT_FLUSH)
+ virtqueue_kick(sq->vq);
+
return n - drops;
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8452f72..7f17785 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,13 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
+ * u32 flags);
* This function is used to submit @n XDP packets for transmit on a
* netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
- * TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1380,7 +1380,8 @@ struct net_device_ops {
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
- struct xdp_frame **xdp);
+ struct xdp_frame **xdp,
+ u32 flags);
void (*ndo_xdp_flush)(struct net_device *dev);
};
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 7ad7792..a3b71a4 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -40,6 +40,10 @@ enum xdp_mem_type {
MEM_TYPE_MAX,
};
+/* XDP flags for ndo_xdp_xmit */
+#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
+#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
+
struct xdp_mem_info {
u32 type; /* enum xdp_mem_type, but known size type */
u32 id;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 1fe3fe6..a7cc7b3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
}
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
- struct xdp_bulk_queue *bq)
+ struct xdp_bulk_queue *bq, u32 flags)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
@@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
prefetch(xdpf);
}
- sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
+ sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
if (sent < 0) {
err = sent;
sent = 0;
@@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
- struct net_device *netdev;
/* This is possible if the dev entry is removed by user space
* between xdp redirect and flush op.
@@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
__clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq);
- bq_xmit_all(dev, bq);
- netdev = dev->dev;
- if (likely(netdev->netdev_ops->ndo_xdp_flush))
- netdev->netdev_ops->ndo_xdp_flush(netdev);
+ bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
}
}
@@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
- bq_xmit_all(obj, bq);
+ bq_xmit_all(obj, bq, 0);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
@@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
- if (dev->dev->netdev_ops->ndo_xdp_flush) {
- struct net_device *fl = dev->dev;
+ if (dev->dev->netdev_ops->ndo_xdp_xmit) {
struct xdp_bulk_queue *bq;
unsigned long *bitmap;
@@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
__clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu);
- bq_xmit_all(dev, bq);
-
- fl->netdev_ops->ndo_xdp_flush(dev->dev);
+ bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
}
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 28e8647..a72ea9f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf))
return -EOVERFLOW;
- sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
+ sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
if (sent <= 0)
return sent;
- dev->netdev_ops->ndo_xdp_flush(dev);
return 0;
}
OpenPOWER on IntegriCloud