diff options
author | royger <royger@FreeBSD.org> | 2016-01-20 15:02:43 +0000 |
---|---|---|
committer | royger <royger@FreeBSD.org> | 2016-01-20 15:02:43 +0000 |
commit | cbb7a2ba43e89bd1dd976a536f34a21a65ffd6cf (patch) | |
tree | 2f65240c41fd3b58d6ac79288786c153cb4b2f61 /sys/dev/xen | |
parent | fb8428bdbe588876c84ae468f107c3a8fd5639fc (diff) | |
download | FreeBSD-src-cbb7a2ba43e89bd1dd976a536f34a21a65ffd6cf.zip FreeBSD-src-cbb7a2ba43e89bd1dd976a536f34a21a65ffd6cf.tar.gz |
xen-netfront: add multiqueue support
Add support for multiple TX and RX queue pairs. The default number of queues
is set to 4, but can be easily changed from the sysctl node hw.xn.num_queues.
Also heavily refactor netfront driver: break out a bunch of helper
functions and different structures. Use threads to handle TX and RX.
Remove some dead code and fix quite a few bugs as I go along.
Submitted by: Wei Liu <wei.liu2@citrix.com>
Reviewed by: royger
Sponsored by: Citrix Systems R&D
Relnotes: Yes
Differential Revision: https://reviews.freebsd.org/D4193
Diffstat (limited to 'sys/dev/xen')
-rw-r--r-- | sys/dev/xen/netfront/netfront.c | 1640 |
1 files changed, 979 insertions, 661 deletions
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index cef6370..e940d93 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2004-2006 Kip Macy + * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,15 +40,14 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/socket.h> #include <sys/sysctl.h> +#include <sys/taskqueue.h> #include <net/if.h> #include <net/if_var.h> #include <net/if_arp.h> #include <net/ethernet.h> #include <net/if_media.h> - #include <net/bpf.h> - #include <net/if_types.h> #include <netinet/in.h> @@ -86,6 +86,12 @@ __FBSDID("$FreeBSD$"); static int xn_enable_lro = 1; TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); +/* + * Number of pairs of queues. + */ +static unsigned long xn_num_queues = 4; +TUNABLE_ULONG("hw.xn.num_queues", &xn_num_queues); + /** * \brief The maximum allowed data fragments in a single transmit * request. @@ -100,149 +106,167 @@ TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); #define net_ratelimit() 0 +struct netfront_rxq; +struct netfront_txq; struct netfront_info; struct netfront_rx_info; -static void xn_txeof(struct netfront_info *); -static void xn_rxeof(struct netfront_info *); -static void network_alloc_rx_buffers(struct netfront_info *); +static void xn_txeof(struct netfront_txq *); +static void xn_rxeof(struct netfront_rxq *); +static void xn_alloc_rx_buffers(struct netfront_rxq *); -static void xn_tick_locked(struct netfront_info *); -static void xn_tick(void *); +static void xn_release_rx_bufs(struct netfront_rxq *); +static void xn_release_tx_bufs(struct netfront_txq *); -static void xn_intr(void *); +static void xn_rxq_intr(void *); +static void xn_txq_intr(void *); +static int xn_intr(void *); static inline int xn_count_frags(struct mbuf *m); -static int xn_assemble_tx_request(struct netfront_info *sc, - struct mbuf *m_head); -static void xn_start_locked(struct ifnet *); -static void xn_start(struct ifnet *); -static int xn_ioctl(struct ifnet *, u_long, caddr_t); +static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *); +static int xn_ioctl(struct ifnet *, u_long, caddr_t); static void xn_ifinit_locked(struct netfront_info *); static void xn_ifinit(void *); static void xn_stop(struct netfront_info *); static void xn_query_features(struct netfront_info *np); -static int xn_configure_features(struct netfront_info *np); -#ifdef notyet -static void xn_watchdog(struct ifnet *); -#endif - -#ifdef notyet -static void netfront_closing(device_t dev); -#endif +static int xn_configure_features(struct netfront_info *np); static void netif_free(struct netfront_info *info); static int netfront_detach(device_t dev); +static int xn_txq_mq_start_locked(struct netfront_txq *, struct mbuf *); +static int xn_txq_mq_start(struct ifnet *, struct mbuf *); + static int talk_to_backend(device_t dev, struct netfront_info *info); static int create_netdev(device_t dev); static void netif_disconnect_backend(struct netfront_info *info); -static int setup_device(device_t dev, struct netfront_info *info); -static void free_ring(int *ref, void *ring_ptr_ref); - -static int xn_ifmedia_upd(struct ifnet *ifp); +static int setup_device(device_t dev, struct netfront_info *info, + unsigned long); +static int xn_ifmedia_upd(struct ifnet *ifp); static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); -/* Xenolinux helper functions */ -int network_connect(struct netfront_info *); - -static void xn_free_rx_ring(struct netfront_info *); - -static void xn_free_tx_ring(struct netfront_info *); +int xn_connect(struct netfront_info *); -static int xennet_get_responses(struct netfront_info *np, - struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, - struct mbuf **list); +static int xn_get_responses(struct netfront_rxq *, + struct netfront_rx_info *, RING_IDX, RING_IDX *, + struct mbuf **); #define virt_to_mfn(x) (vtophys(x) >> PAGE_SHIFT) #define INVALID_P2M_ENTRY (~0UL) -/* - * Mbuf pointers. We need these to keep track of the virtual addresses - * of our mbuf chains since we can only convert from virtual to physical, - * not the other way around. The size must track the free index arrays. - */ -struct xn_chain_data { - struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; - int xn_tx_chain_cnt; - struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; +struct xn_rx_stats +{ + u_long rx_packets; /* total packets received */ + u_long rx_bytes; /* total bytes received */ + u_long rx_errors; /* bad packets received */ }; -struct netfront_stats +struct xn_tx_stats { - u_long rx_packets; /* total packets received */ - u_long tx_packets; /* total packets transmitted */ - u_long rx_bytes; /* total bytes received */ - u_long tx_bytes; /* total bytes transmitted */ - u_long rx_errors; /* bad packets received */ - u_long tx_errors; /* packet transmit problems */ + u_long tx_packets; /* total packets transmitted */ + u_long tx_bytes; /* total bytes transmitted */ + u_long tx_errors; /* packet transmit problems */ }; -struct netfront_info { - struct ifnet *xn_ifp; - struct lro_ctrl xn_lro; +#define XN_QUEUE_NAME_LEN 8 /* xn{t,r}x_%u, allow for two digits */ +struct netfront_rxq { + struct netfront_info *info; + u_int id; + char name[XN_QUEUE_NAME_LEN]; + struct mtx lock; + + int ring_ref; + netif_rx_front_ring_t ring; + xen_intr_handle_t xen_intr_handle; + + grant_ref_t gref_head; + grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; + + struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; + struct mbufq batch; /* batch queue */ + int target; + + xen_pfn_t pfn_array[NET_RX_RING_SIZE]; + + struct lro_ctrl lro; + + struct taskqueue *tq; + struct task intrtask; + + struct xn_rx_stats stats; +}; + +struct netfront_txq { + struct netfront_info *info; + u_int id; + char name[XN_QUEUE_NAME_LEN]; + struct mtx lock; + + int ring_ref; + netif_tx_front_ring_t ring; + xen_intr_handle_t xen_intr_handle; + + grant_ref_t gref_head; + grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; + + struct mbuf *mbufs[NET_TX_RING_SIZE + 1]; + int mbufs_cnt; + struct buf_ring *br; + + struct taskqueue *tq; + struct task intrtask; + struct task defrtask; + + bool full; + + struct xn_tx_stats stats; +}; - struct netfront_stats stats; - u_int tx_full; +struct netfront_info { + struct ifnet *xn_ifp; - netif_tx_front_ring_t tx; - netif_rx_front_ring_t rx; + struct mtx sc_lock; - struct mtx tx_lock; - struct mtx rx_lock; - struct mtx sc_lock; + u_int num_queues; + struct netfront_rxq *rxq; + struct netfront_txq *txq; - xen_intr_handle_t xen_intr_handle; - u_int carrier; - u_int maxfrags; + u_int carrier; + u_int maxfrags; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 32 #define RX_MAX_TARGET NET_RX_RING_SIZE - int rx_min_target; - int rx_max_target; - int rx_target; - - grant_ref_t gref_tx_head; - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; - grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; + int rx_min_target; + int rx_max_target; device_t xbdev; - int tx_ring_ref; - int rx_ring_ref; uint8_t mac[ETHER_ADDR_LEN]; - struct xn_chain_data xn_cdata; /* mbufs */ - struct mbufq xn_rx_batch; /* batch queue */ int xn_if_flags; - struct callout xn_stat_ch; - xen_pfn_t rx_pfn_array[NET_RX_RING_SIZE]; struct ifmedia sc_media; bool xn_resume; }; -#define rx_mbufs xn_cdata.xn_rx_chain -#define tx_mbufs xn_cdata.xn_tx_chain +struct netfront_rx_info { + struct netif_rx_response rx; + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +}; -#define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) -#define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) +#define XN_RX_LOCK(_q) mtx_lock(&(_q)->lock) +#define XN_RX_UNLOCK(_q) mtx_unlock(&(_q)->lock) -#define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) -#define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) +#define XN_TX_LOCK(_q) mtx_lock(&(_q)->lock) +#define XN_TX_TRYLOCK(_q) mtx_trylock(&(_q)->lock) +#define XN_TX_UNLOCK(_q) mtx_unlock(&(_q)->lock) #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); -#define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); -#define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); - -struct netfront_rx_info { - struct netif_rx_response rx; - struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; -}; +#define XN_RX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); +#define XN_TX_LOCK_ASSERT(_q) mtx_assert(&(_q)->lock, MA_OWNED); #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) @@ -253,6 +277,7 @@ struct netfront_rx_info { static inline void add_id_to_freelist(struct mbuf **list, uintptr_t id) { + KASSERT(id != 0, ("%s: the head item (0) must always be free.", __func__)); list[id] = list[0]; @@ -272,30 +297,33 @@ get_id_from_freelist(struct mbuf **list) } static inline int -xennet_rxidx(RING_IDX idx) +xn_rxidx(RING_IDX idx) { + return idx & (NET_RX_RING_SIZE - 1); } static inline struct mbuf * -xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri) +xn_get_rx_mbuf(struct netfront_rxq *rxq, RING_IDX ri) { - int i = xennet_rxidx(ri); + int i; struct mbuf *m; - m = np->rx_mbufs[i]; - np->rx_mbufs[i] = NULL; + i = xn_rxidx(ri); + m = rxq->mbufs[i]; + rxq->mbufs[i] = NULL; return (m); } static inline grant_ref_t -xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) +xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) { - int i = xennet_rxidx(ri); - grant_ref_t ref = np->grant_rx_ref[i]; + int i = xn_rxidx(ri); + grant_ref_t ref = rxq->grant_ref[i]; + KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); - np->grant_rx_ref[i] = GRANT_REF_INVALID; - return ref; + rxq->grant_ref[i] = GRANT_REF_INVALID; + return (ref); } #define IPRINTK(fmt, args...) \ @@ -392,7 +420,7 @@ netfront_attach(device_t dev) int err; err = create_netdev(dev); - if (err) { + if (err != 0) { xenbus_dev_fatal(dev, err, "creating netdev"); return (err); } @@ -402,19 +430,29 @@ netfront_attach(device_t dev) OID_AUTO, "enable_lro", CTLFLAG_RW, &xn_enable_lro, 0, "Large Receive Offload"); + SYSCTL_ADD_ULONG(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "num_queues", CTLFLAG_RD, + &xn_num_queues, "Number of pairs of queues"); + return (0); } static int netfront_suspend(device_t dev) { - struct netfront_info *info = device_get_softc(dev); + struct netfront_info *np = device_get_softc(dev); + u_int i; - XN_RX_LOCK(info); - XN_TX_LOCK(info); - netfront_carrier_off(info); - XN_TX_UNLOCK(info); - XN_RX_UNLOCK(info); + for (i = 0; i < np->num_queues; i++) { + XN_RX_LOCK(&np->rxq[i]); + XN_TX_LOCK(&np->txq[i]); + } + netfront_carrier_off(np); + for (i = 0; i < np->num_queues; i++) { + XN_RX_UNLOCK(&np->rxq[i]); + XN_TX_UNLOCK(&np->txq[i]); + } return (0); } @@ -434,6 +472,61 @@ netfront_resume(device_t dev) return (0); } +static int +write_queue_xenstore_keys(device_t dev, + struct netfront_rxq *rxq, + struct netfront_txq *txq, + struct xs_transaction *xst, bool hierarchy) +{ + int err; + const char *message; + const char *node = xenbus_get_node(dev); + char *path; + size_t path_size; + + KASSERT(rxq->id == txq->id, ("Mismatch between RX and TX queue ids")); + /* Split event channel support is not yet there. */ + KASSERT(rxq->xen_intr_handle == txq->xen_intr_handle, + ("Split event channels are not supported")); + + if (hierarchy) { + path_size = strlen(node) + 10; + path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); + snprintf(path, path_size, "%s/queue-%u", node, rxq->id); + } else { + path_size = strlen(node) + 1; + path = malloc(path_size, M_DEVBUF, M_WAITOK|M_ZERO); + snprintf(path, path_size, "%s", node); + } + + err = xs_printf(*xst, path, "tx-ring-ref","%u", txq->ring_ref); + if (err != 0) { + message = "writing tx ring-ref"; + goto error; + } + err = xs_printf(*xst, path, "rx-ring-ref","%u", rxq->ring_ref); + if (err != 0) { + message = "writing rx ring-ref"; + goto error; + } + err = xs_printf(*xst, path, "event-channel", "%u", + xen_intr_port(rxq->xen_intr_handle)); + if (err != 0) { + message = "writing event-channel"; + goto error; + } + + free(path, M_DEVBUF); + + return (0); + +error: + free(path, M_DEVBUF); + xenbus_dev_fatal(dev, err, "%s", message); + + return (err); +} + /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(device_t dev, struct netfront_info *info) @@ -442,134 +535,427 @@ talk_to_backend(device_t dev, struct netfront_info *info) struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; + unsigned long num_queues, max_queues = 0; + unsigned int i; err = xen_net_read_mac(dev, info->mac); - if (err) { + if (err != 0) { xenbus_dev_fatal(dev, err, "parsing %s/mac", node); goto out; } - /* Create shared ring, alloc event channel. */ - err = setup_device(dev, info); - if (err) + err = xs_scanf(XST_NIL, xenbus_get_otherend_path(info->xbdev), + "multi-queue-max-queues", NULL, "%lu", &max_queues); + if (err != 0) + max_queues = 1; + num_queues = xn_num_queues; + if (num_queues > max_queues) + num_queues = max_queues; + + err = setup_device(dev, info, num_queues); + if (err != 0) goto out; again: err = xs_transaction_start(&xst); - if (err) { + if (err != 0) { xenbus_dev_fatal(dev, err, "starting transaction"); - goto destroy_ring; - } - err = xs_printf(xst, node, "tx-ring-ref","%u", - info->tx_ring_ref); - if (err) { - message = "writing tx ring-ref"; - goto abort_transaction; - } - err = xs_printf(xst, node, "rx-ring-ref","%u", - info->rx_ring_ref); - if (err) { - message = "writing rx ring-ref"; - goto abort_transaction; + goto free; } - err = xs_printf(xst, node, - "event-channel", "%u", - xen_intr_port(info->xen_intr_handle)); - if (err) { - message = "writing event-channel"; - goto abort_transaction; + + if (info->num_queues == 1) { + err = write_queue_xenstore_keys(dev, &info->rxq[0], + &info->txq[0], &xst, false); + if (err != 0) + goto abort_transaction_no_def_error; + } else { + err = xs_printf(xst, node, "multi-queue-num-queues", + "%u", info->num_queues); + if (err != 0) { + message = "writing multi-queue-num-queues"; + goto abort_transaction; + } + + for (i = 0; i < info->num_queues; i++) { + err = write_queue_xenstore_keys(dev, &info->rxq[i], + &info->txq[i], &xst, true); + if (err != 0) + goto abort_transaction_no_def_error; + } } + err = xs_printf(xst, node, "request-rx-copy", "%u", 1); - if (err) { + if (err != 0) { message = "writing request-rx-copy"; goto abort_transaction; } err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); - if (err) { + if (err != 0) { message = "writing feature-rx-notify"; goto abort_transaction; } err = xs_printf(xst, node, "feature-sg", "%d", 1); - if (err) { + if (err != 0) { message = "writing feature-sg"; goto abort_transaction; } err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); - if (err) { + if (err != 0) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } err = xs_transaction_end(xst, 0); - if (err) { + if (err != 0) { if (err == EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); - goto destroy_ring; + goto free; } return 0; abort_transaction: - xs_transaction_end(xst, 1); xenbus_dev_fatal(dev, err, "%s", message); - destroy_ring: + abort_transaction_no_def_error: + xs_transaction_end(xst, 1); + free: netif_free(info); out: - return err; + return (err); +} + +static void +xn_rxq_tq_intr(void *xrxq, int pending) +{ + struct netfront_rxq *rxq = xrxq; + + XN_RX_LOCK(rxq); + xn_rxeof(rxq); + XN_RX_UNLOCK(rxq); +} + +static void +xn_txq_start(struct netfront_txq *txq) +{ + struct netfront_info *np = txq->info; + struct ifnet *ifp = np->xn_ifp; + + XN_TX_LOCK_ASSERT(txq); + if (!drbr_empty(ifp, txq->br)) + xn_txq_mq_start_locked(txq, NULL); +} + +static void +xn_txq_tq_intr(void *xtxq, int pending) +{ + struct netfront_txq *txq = xtxq; + + XN_TX_LOCK(txq); + if (RING_HAS_UNCONSUMED_RESPONSES(&txq->ring)) + xn_txeof(txq); + xn_txq_start(txq); + XN_TX_UNLOCK(txq); +} + +static void +xn_txq_tq_deferred(void *xtxq, int pending) +{ + struct netfront_txq *txq = xtxq; + + XN_TX_LOCK(txq); + xn_txq_start(txq); + XN_TX_UNLOCK(txq); +} + +static void +disconnect_rxq(struct netfront_rxq *rxq) +{ + + xn_release_rx_bufs(rxq); + gnttab_free_grant_references(rxq->gref_head); + gnttab_end_foreign_access_ref(rxq->ring_ref); + /* + * No split event channel support at the moment, handle will + * be unbound in tx. So no need to call xen_intr_unbind here, + * but we do want to reset the handler to 0. + */ + rxq->xen_intr_handle = 0; +} + +static void +destroy_rxq(struct netfront_rxq *rxq) +{ + + free(rxq->ring.sring, M_DEVBUF); + taskqueue_drain_all(rxq->tq); + taskqueue_free(rxq->tq); +} + +static void +destroy_rxqs(struct netfront_info *np) +{ + int i; + + for (i = 0; i < np->num_queues; i++) + destroy_rxq(&np->rxq[i]); + + free(np->rxq, M_DEVBUF); + np->rxq = NULL; } static int -setup_device(device_t dev, struct netfront_info *info) +setup_rxqs(device_t dev, struct netfront_info *info, + unsigned long num_queues) { - netif_tx_sring_t *txs; - netif_rx_sring_t *rxs; + int q, i; int error; + netif_rx_sring_t *rxs; + struct netfront_rxq *rxq; + + info->rxq = malloc(sizeof(struct netfront_rxq) * num_queues, + M_DEVBUF, M_WAITOK|M_ZERO); + + for (q = 0; q < num_queues; q++) { + rxq = &info->rxq[q]; + + rxq->id = q; + rxq->info = info; + rxq->target = RX_MIN_TARGET; + rxq->ring_ref = GRANT_REF_INVALID; + rxq->ring.sring = NULL; + snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); + mtx_init(&rxq->lock, rxq->name, "netfront receive lock", + MTX_DEF); + + for (i = 0; i <= NET_RX_RING_SIZE; i++) { + rxq->mbufs[i] = NULL; + rxq->grant_ref[i] = GRANT_REF_INVALID; + } + + mbufq_init(&rxq->batch, INT_MAX); + + /* Start resources allocation */ - info->tx_ring_ref = GRANT_REF_INVALID; - info->rx_ring_ref = GRANT_REF_INVALID; - info->rx.sring = NULL; - info->tx.sring = NULL; + if (gnttab_alloc_grant_references(RX_MAX_TARGET, + &rxq->gref_head) != 0) { + device_printf(dev, "allocating rx gref"); + error = ENOMEM; + goto fail; + } + + rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, + M_WAITOK|M_ZERO); + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&rxq->ring, rxs, PAGE_SIZE); + + error = xenbus_grant_ring(dev, virt_to_mfn(rxs), + &rxq->ring_ref); + if (error != 0) { + device_printf(dev, "granting rx ring page"); + goto fail_grant_ring; + } - txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); - if (!txs) { - error = ENOMEM; - xenbus_dev_fatal(dev, error, "allocating tx ring page"); - goto fail; + TASK_INIT(&rxq->intrtask, 0, xn_rxq_tq_intr, rxq); + rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, + taskqueue_thread_enqueue, &rxq->tq); + + error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, + "%s rxq %d", device_get_nameunit(dev), rxq->id); + if (error != 0) { + device_printf(dev, "failed to start rx taskq %d\n", + rxq->id); + goto fail_start_thread; + } } - SHARED_RING_INIT(txs); - FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); - error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); - if (error) - goto fail; - - rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); - if (!rxs) { - error = ENOMEM; - xenbus_dev_fatal(dev, error, "allocating rx ring page"); - goto fail; + + return (0); + +fail_start_thread: + gnttab_end_foreign_access_ref(rxq->ring_ref); + taskqueue_drain_all(rxq->tq); + taskqueue_free(rxq->tq); +fail_grant_ring: + gnttab_free_grant_references(rxq->gref_head); + free(rxq->ring.sring, M_DEVBUF); +fail: + for (; q >= 0; q--) { + disconnect_rxq(&info->rxq[q]); + destroy_rxq(&info->rxq[q]); } - SHARED_RING_INIT(rxs); - FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); - error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); - if (error) - goto fail; + free(info->rxq, M_DEVBUF); + return (error); +} + +static void +disconnect_txq(struct netfront_txq *txq) +{ + + xn_release_tx_bufs(txq); + gnttab_free_grant_references(txq->gref_head); + gnttab_end_foreign_access_ref(txq->ring_ref); + xen_intr_unbind(&txq->xen_intr_handle); +} + +static void +destroy_txq(struct netfront_txq *txq) +{ + + free(txq->ring.sring, M_DEVBUF); + buf_ring_free(txq->br, M_DEVBUF); + taskqueue_drain_all(txq->tq); + taskqueue_free(txq->tq); +} + +static void +destroy_txqs(struct netfront_info *np) +{ + int i; + + for (i = 0; i < np->num_queues; i++) + destroy_txq(&np->txq[i]); + + free(np->txq, M_DEVBUF); + np->txq = NULL; +} + +static int +setup_txqs(device_t dev, struct netfront_info *info, + unsigned long num_queues) +{ + int q, i; + int error; + netif_tx_sring_t *txs; + struct netfront_txq *txq; + + info->txq = malloc(sizeof(struct netfront_txq) * num_queues, + M_DEVBUF, M_WAITOK|M_ZERO); + + for (q = 0; q < num_queues; q++) { + txq = &info->txq[q]; + + txq->id = q; + txq->info = info; + + txq->ring_ref = GRANT_REF_INVALID; + txq->ring.sring = NULL; + + snprintf(txq->name, XN_QUEUE_NAME_LEN, "xntx_%u", q); + + mtx_init(&txq->lock, txq->name, "netfront transmit lock", + MTX_DEF); + + for (i = 0; i <= NET_TX_RING_SIZE; i++) { + txq->mbufs[i] = (void *) ((u_long) i+1); + txq->grant_ref[i] = GRANT_REF_INVALID; + } + txq->mbufs[NET_TX_RING_SIZE] = (void *)0; + + /* Start resources allocation. */ + + if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, + &txq->gref_head) != 0) { + device_printf(dev, "failed to allocate tx grant refs\n"); + error = ENOMEM; + goto fail; + } + + txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, + M_WAITOK|M_ZERO); + SHARED_RING_INIT(txs); + FRONT_RING_INIT(&txq->ring, txs, PAGE_SIZE); + + error = xenbus_grant_ring(dev, virt_to_mfn(txs), + &txq->ring_ref); + if (error != 0) { + device_printf(dev, "failed to grant tx ring\n"); + goto fail_grant_ring; + } + + txq->br = buf_ring_alloc(NET_TX_RING_SIZE, M_DEVBUF, + M_WAITOK, &txq->lock); + TASK_INIT(&txq->defrtask, 0, xn_txq_tq_deferred, txq); + TASK_INIT(&txq->intrtask, 0, xn_txq_tq_intr, txq); + + txq->tq = taskqueue_create_fast(txq->name, M_WAITOK, + taskqueue_thread_enqueue, &txq->tq); - error = xen_intr_alloc_and_bind_local_port(dev, - xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info, - INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle); + error = taskqueue_start_threads(&txq->tq, 1, PI_NET, + "%s txq %d", device_get_nameunit(dev), txq->id); + if (error != 0) { + device_printf(dev, "failed to start tx taskq %d\n", + txq->id); + goto fail_start_thread; + } + + error = xen_intr_alloc_and_bind_local_port(dev, + xenbus_get_otherend_id(dev), xn_intr, /* handler */ NULL, + &info->txq[q], + INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, + &txq->xen_intr_handle); - if (error) { - xenbus_dev_fatal(dev, error, - "xen_intr_alloc_and_bind_local_port failed"); - goto fail; + if (error != 0) { + device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); + goto fail_bind_port; + } } return (0); - fail: - netif_free(info); +fail_bind_port: + taskqueue_drain_all(txq->tq); +fail_start_thread: + gnttab_free_grant_references(txq->gref_head); + free(txq->ring.sring, M_DEVBUF); + gnttab_end_foreign_access_ref(txq->ring_ref); + buf_ring_free(txq->br, M_DEVBUF); + taskqueue_free(txq->tq); +fail_grant_ring: + gnttab_free_grant_references(txq->gref_head); + free(txq->ring.sring, M_DEVBUF); +fail: + for (; q >= 0; q--) { + disconnect_txq(&info->txq[q]); + destroy_txq(&info->txq[q]); + } + + free(info->txq, M_DEVBUF); + return (error); +} + +static int +setup_device(device_t dev, struct netfront_info *info, + unsigned long num_queues) +{ + int error; + int q; + + if (info->txq) + destroy_txqs(info); + + if (info->rxq) + destroy_rxqs(info); + + info->num_queues = 0; + + error = setup_rxqs(dev, info, num_queues); + if (error != 0) + goto out; + error = setup_txqs(dev, info, num_queues); + if (error != 0) + goto out; + + info->num_queues = num_queues; + + /* No split event channel at the moment. */ + for (q = 0; q < num_queues; q++) + info->rxq[q].xen_intr_handle = info->txq[q].xen_intr_handle; + + return (0); + +out: + KASSERT(error != 0, ("Error path taken without providing an error code")); return (error); } @@ -614,7 +1000,7 @@ netfront_backend_changed(device_t dev, XenbusState newstate) case XenbusStateInitWait: if (xenbus_get_state(dev) != XenbusStateInitialising) break; - if (network_connect(sc) != 0) + if (xn_connect(sc) != 0) break; xenbus_set_state(dev, XenbusStateConnected); break; @@ -629,42 +1015,6 @@ netfront_backend_changed(device_t dev, XenbusState newstate) } } -static void -xn_free_rx_ring(struct netfront_info *sc) -{ -#if 0 - int i; - - for (i = 0; i < NET_RX_RING_SIZE; i++) { - if (sc->xn_cdata.rx_mbufs[i] != NULL) { - m_freem(sc->rx_mbufs[i]); - sc->rx_mbufs[i] = NULL; - } - } - - sc->rx.rsp_cons = 0; - sc->xn_rx_if->req_prod = 0; - sc->xn_rx_if->event = sc->rx.rsp_cons ; -#endif -} - -static void -xn_free_tx_ring(struct netfront_info *sc) -{ -#if 0 - int i; - - for (i = 0; i < NET_TX_RING_SIZE; i++) { - if (sc->tx_mbufs[i] != NULL) { - m_freem(sc->tx_mbufs[i]); - sc->xn_cdata.xn_tx_chain[i] = NULL; - } - } - - return; -#endif -} - /** * \brief Verify that there is sufficient space in the Tx ring * buffer for a maximally sized request to be enqueued. @@ -673,20 +1023,21 @@ xn_free_tx_ring(struct netfront_info *sc) * fragment, plus up to 2 entries for "options" (e.g. TSO). */ static inline int -xn_tx_slot_available(struct netfront_info *np) +xn_tx_slot_available(struct netfront_txq *txq) { - return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2)); + + return (RING_FREE_REQUESTS(&txq->ring) > (MAX_TX_REQ_FRAGS + 2)); } static void -netif_release_tx_bufs(struct netfront_info *np) +xn_release_tx_bufs(struct netfront_txq *txq) { int i; for (i = 1; i <= NET_TX_RING_SIZE; i++) { struct mbuf *m; - m = np->tx_mbufs[i]; + m = txq->mbufs[i]; /* * We assume that no kernel addresses are @@ -696,13 +1047,13 @@ netif_release_tx_bufs(struct netfront_info *np) */ if (((uintptr_t)m) <= NET_TX_RING_SIZE) continue; - gnttab_end_foreign_access_ref(np->grant_tx_ref[i]); - gnttab_release_grant_reference(&np->gref_tx_head, - np->grant_tx_ref[i]); - np->grant_tx_ref[i] = GRANT_REF_INVALID; - add_id_to_freelist(np->tx_mbufs, i); - np->xn_cdata.xn_tx_chain_cnt--; - if (np->xn_cdata.xn_tx_chain_cnt < 0) { + gnttab_end_foreign_access_ref(txq->grant_ref[i]); + gnttab_release_grant_reference(&txq->gref_head, + txq->grant_ref[i]); + txq->grant_ref[i] = GRANT_REF_INVALID; + add_id_to_freelist(txq->mbufs, i); + txq->mbufs_cnt--; + if (txq->mbufs_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } m_free(m); @@ -710,9 +1061,10 @@ netif_release_tx_bufs(struct netfront_info *np) } static void -network_alloc_rx_buffers(struct netfront_info *sc) +xn_alloc_rx_buffers(struct netfront_rxq *rxq) { - int otherend_id = xenbus_get_otherend_id(sc->xbdev); + struct netfront_info *np = rxq->info; + int otherend_id = xenbus_get_otherend_id(np->xbdev); unsigned short id; struct mbuf *m_new; int i, batch_target, notify; @@ -722,9 +1074,9 @@ network_alloc_rx_buffers(struct netfront_info *sc) vm_offset_t vaddr; u_long pfn; - req_prod = sc->rx.req_prod_pvt; + req_prod = rxq->ring.req_prod_pvt; - if (__predict_false(sc->carrier == 0)) + if (__predict_false(np->carrier == 0)) return; /* @@ -736,21 +1088,19 @@ network_alloc_rx_buffers(struct netfront_info *sc) * Here we attempt to maintain rx_target buffers in flight, counting * buffers that we have yet to process in the receive ring. */ - batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); - for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { + batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons); + for (i = mbufq_len(&rxq->batch); i < batch_target; i++) { m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (m_new == NULL) { if (i != 0) goto refill; - /* - * XXX set timer - */ + /* XXX set timer */ break; } m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; /* queue the mbufs allocated */ - (void )mbufq_enqueue(&sc->xn_rx_batch, m_new); + mbufq_enqueue(&rxq->batch, m_new); } /* @@ -759,8 +1109,8 @@ network_alloc_rx_buffers(struct netfront_info *sc) * of submission worthwhile. Otherwise wait for more mbufs and * request entries to become available. */ - if (i < (sc->rx_target/2)) { - if (req_prod >sc->rx.sring->req_prod) + if (i < (rxq->target/2)) { + if (req_prod > rxq->ring.sring->req_prod) goto push; return; } @@ -771,44 +1121,44 @@ network_alloc_rx_buffers(struct netfront_info *sc) * low" as having less than a fourth of our target buffers free * at the time we refilled the queue. */ - if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) { - sc->rx_target *= 2; - if (sc->rx_target > sc->rx_max_target) - sc->rx_target = sc->rx_max_target; + if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) { + rxq->target *= 2; + if (rxq->target > np->rx_max_target) + rxq->target = np->rx_max_target; } refill: for (i = 0; ; i++) { - if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) + if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL) break; m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); - id = xennet_rxidx(req_prod + i); + id = xn_rxidx(req_prod + i); - KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain")); - sc->rx_mbufs[id] = m_new; + KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); + rxq->mbufs[id] = m_new; - ref = gnttab_claim_grant_reference(&sc->gref_rx_head); + ref = gnttab_claim_grant_reference(&rxq->gref_head); KASSERT(ref != GNTTAB_LIST_END, ("reserved grant references exhuasted")); - sc->grant_rx_ref[id] = ref; + rxq->grant_ref[id] = ref; vaddr = mtod(m_new, vm_offset_t); pfn = vtophys(vaddr) >> PAGE_SHIFT; - req = RING_GET_REQUEST(&sc->rx, req_prod + i); + req = RING_GET_REQUEST(&rxq->ring, req_prod + i); gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0); req->id = id; req->gref = ref; - sc->rx_pfn_array[i] = + rxq->pfn_array[i] = vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; } KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ - KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); + KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed")); /* * We may have allocated buffers which have entries outstanding * in the page * update queue -- make sure we flush those first! @@ -816,19 +1166,44 @@ refill: wmb(); /* Above is a suitable barrier to ensure backend will see requests. */ - sc->rx.req_prod_pvt = req_prod + i; + rxq->ring.req_prod_pvt = req_prod + i; push: - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); if (notify) - xen_intr_signal(sc->xen_intr_handle); + xen_intr_signal(rxq->xen_intr_handle); } static void -xn_rxeof(struct netfront_info *np) +xn_release_rx_bufs(struct netfront_rxq *rxq) +{ + int i, ref; + struct mbuf *m; + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + m = rxq->mbufs[i]; + + if (m == NULL) + continue; + + ref = rxq->grant_ref[i]; + if (ref == GRANT_REF_INVALID) + continue; + + gnttab_end_foreign_access_ref(ref); + gnttab_release_grant_reference(&rxq->gref_head, ref); + rxq->mbufs[i] = NULL; + rxq->grant_ref[i] = GRANT_REF_INVALID; + m_freem(m); + } +} + +static void +xn_rxeof(struct netfront_rxq *rxq) { struct ifnet *ifp; + struct netfront_info *np = rxq->info; #if (defined(INET) || defined(INET6)) - struct lro_ctrl *lro = &np->xn_lro; + struct lro_ctrl *lro = &rxq->lro; struct lro_entry *queued; #endif struct netfront_rx_info rinfo; @@ -836,35 +1211,35 @@ xn_rxeof(struct netfront_info *np) struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct mbuf *m; - struct mbufq rxq, errq; + struct mbufq mbufq_rxq, mbufq_errq; int err, work_to_do; do { - XN_RX_LOCK_ASSERT(np); + XN_RX_LOCK_ASSERT(rxq); if (!netfront_carrier_ok(np)) return; /* XXX: there should be some sane limit. */ - mbufq_init(&errq, INT_MAX); - mbufq_init(&rxq, INT_MAX); + mbufq_init(&mbufq_errq, INT_MAX); + mbufq_init(&mbufq_rxq, INT_MAX); ifp = np->xn_ifp; - rp = np->rx.sring->rsp_prod; + rp = rxq->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - i = np->rx.rsp_cons; + i = rxq->ring.rsp_cons; while ((i != rp)) { - memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); + memcpy(rx, RING_GET_RESPONSE(&rxq->ring, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); m = NULL; - err = xennet_get_responses(np, &rinfo, rp, &i, &m); + err = xn_get_responses(rxq, &rinfo, rp, &i, &m); if (__predict_false(err)) { if (m) - (void )mbufq_enqueue(&errq, m); - np->stats.rx_errors++; + (void )mbufq_enqueue(&mbufq_errq, m); + rxq->stats.rx_errors++; continue; } @@ -882,26 +1257,24 @@ xn_rxeof(struct netfront_info *np) m->m_pkthdr.csum_data = 0xffff; } - np->stats.rx_packets++; - np->stats.rx_bytes += m->m_pkthdr.len; + rxq->stats.rx_packets++; + rxq->stats.rx_bytes += m->m_pkthdr.len; - (void )mbufq_enqueue(&rxq, m); - np->rx.rsp_cons = i; + (void )mbufq_enqueue(&mbufq_rxq, m); + rxq->ring.rsp_cons = i; } - mbufq_drain(&errq); + mbufq_drain(&mbufq_errq); /* * Process all the mbufs after the remapping is complete. * Break the mbuf chain first though. */ - while ((m = mbufq_dequeue(&rxq)) != NULL) { + while ((m = mbufq_dequeue(&mbufq_rxq)) != NULL) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); - /* - * Do we really need to drop the rx lock? - */ - XN_RX_UNLOCK(np); + /* XXX: Do we really need to drop the rx lock? */ + XN_RX_UNLOCK(rxq); #if (defined(INET) || defined(INET6)) /* Use LRO if possible */ if ((ifp->if_capenable & IFCAP_LRO) == 0 || @@ -915,10 +1288,11 @@ xn_rxeof(struct netfront_info *np) #else (*ifp->if_input)(ifp, m); #endif - XN_RX_LOCK(np); + + XN_RX_LOCK(rxq); } - np->rx.rsp_cons = i; + rxq->ring.rsp_cons = i; #if (defined(INET) || defined(INET6)) /* @@ -931,30 +1305,23 @@ xn_rxeof(struct netfront_info *np) } #endif -#if 0 - /* If we get a callback with very few responses, reduce fill target. */ - /* NB. Note exponential increase, linear decrease. */ - if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > - ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) - np->rx_target = np->rx_min_target; -#endif - - network_alloc_rx_buffers(np); + xn_alloc_rx_buffers(rxq); - RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); + RING_FINAL_CHECK_FOR_RESPONSES(&rxq->ring, work_to_do); } while (work_to_do); } static void -xn_txeof(struct netfront_info *np) +xn_txeof(struct netfront_txq *txq) { RING_IDX i, prod; unsigned short id; struct ifnet *ifp; netif_tx_response_t *txr; struct mbuf *m; + struct netfront_info *np = txq->info; - XN_TX_LOCK_ASSERT(np); + XN_TX_LOCK_ASSERT(txq); if (!netfront_carrier_ok(np)) return; @@ -962,11 +1329,11 @@ xn_txeof(struct netfront_info *np) ifp = np->xn_ifp; do { - prod = np->tx.sring->rsp_prod; + prod = txq->ring.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ - for (i = np->tx.rsp_cons; i != prod; i++) { - txr = RING_GET_RESPONSE(&np->tx, i); + for (i = txq->ring.rsp_cons; i != prod; i++) { + txr = RING_GET_RESPONSE(&txq->ring, i); if (txr->status == NETIF_RSP_NULL) continue; @@ -975,8 +1342,8 @@ xn_txeof(struct netfront_info *np) __func__, txr->status); } id = txr->id; - m = np->tx_mbufs[id]; - KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); + m = txq->mbufs[id]; + KASSERT(m != NULL, ("mbuf not found in chain")); KASSERT((uintptr_t)m > NET_TX_RING_SIZE, ("mbuf already on the free list, but we're " "trying to free it again!")); @@ -989,24 +1356,23 @@ xn_txeof(struct netfront_info *np) if (!m->m_next) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (__predict_false(gnttab_query_foreign_access( - np->grant_tx_ref[id]) != 0)) { + txq->grant_ref[id]) != 0)) { panic("%s: grant id %u still in use by the " "backend", __func__, id); } - gnttab_end_foreign_access_ref( - np->grant_tx_ref[id]); + gnttab_end_foreign_access_ref(txq->grant_ref[id]); gnttab_release_grant_reference( - &np->gref_tx_head, np->grant_tx_ref[id]); - np->grant_tx_ref[id] = GRANT_REF_INVALID; + &txq->gref_head, txq->grant_ref[id]); + txq->grant_ref[id] = GRANT_REF_INVALID; - np->tx_mbufs[id] = NULL; - add_id_to_freelist(np->tx_mbufs, id); - np->xn_cdata.xn_tx_chain_cnt--; + txq->mbufs[id] = NULL; + add_id_to_freelist(txq->mbufs, id); + txq->mbufs_cnt--; m_free(m); - /* Only mark the queue active if we've freed up at least one slot to try */ + /* Only mark the txq active if we've freed up at least one slot to try */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } - np->tx.rsp_cons = prod; + txq->ring.rsp_cons = prod; /* * Set a new event, then check for race with update of @@ -1017,65 +1383,66 @@ xn_txeof(struct netfront_info *np) * cases notification from Xen is likely to be the only kick * that we'll get. */ - np->tx.sring->rsp_event = - prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; + txq->ring.sring->rsp_event = + prod + ((txq->ring.sring->req_prod - prod) >> 1) + 1; mb(); - } while (prod != np->tx.sring->rsp_prod); + } while (prod != txq->ring.sring->rsp_prod); - if (np->tx_full && - ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { - np->tx_full = 0; -#if 0 - if (np->user_state == UST_OPEN) - netif_wake_queue(dev); -#endif + if (txq->full && + ((txq->ring.sring->req_prod - prod) < NET_TX_RING_SIZE)) { + txq->full = false; + taskqueue_enqueue(txq->tq, &txq->intrtask); } } + static void -xn_intr(void *xsc) +xn_rxq_intr(void *xrxq) { - struct netfront_info *np = xsc; - struct ifnet *ifp = np->xn_ifp; + struct netfront_rxq *rxq = xrxq; -#if 0 - if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && - likely(netfront_carrier_ok(np)) && - ifp->if_drv_flags & IFF_DRV_RUNNING)) - return; -#endif - if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) { - XN_TX_LOCK(np); - xn_txeof(np); - XN_TX_UNLOCK(np); - } + taskqueue_enqueue_fast(rxq->tq, &rxq->intrtask); +} - XN_RX_LOCK(np); - xn_rxeof(np); - XN_RX_UNLOCK(np); +static void +xn_txq_intr(void *xtxq) +{ + struct netfront_txq *txq = xtxq; - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - xn_start(ifp); + taskqueue_enqueue_fast(txq->tq, &txq->intrtask); +} + +static int +xn_intr(void *xsc) +{ + struct netfront_txq *txq = xsc; + struct netfront_info *np = txq->info; + struct netfront_rxq *rxq = &np->rxq[txq->id]; + + /* kick both tx and rx */ + xn_rxq_intr(rxq); + xn_txq_intr(txq); + + return (FILTER_HANDLED); } static void -xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, - grant_ref_t ref) +xn_move_rx_slot(struct netfront_rxq *rxq, struct mbuf *m, + grant_ref_t ref) { - int new = xennet_rxidx(np->rx.req_prod_pvt); - - KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); - np->rx_mbufs[new] = m; - np->grant_rx_ref[new] = ref; - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; - np->rx.req_prod_pvt++; + int new = xn_rxidx(rxq->ring.req_prod_pvt); + + KASSERT(rxq->mbufs[new] == NULL, ("mbufs != NULL")); + rxq->mbufs[new] = m; + rxq->grant_ref[new] = ref; + RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->id = new; + RING_GET_REQUEST(&rxq->ring, rxq->ring.req_prod_pvt)->gref = ref; + rxq->ring.req_prod_pvt++; } static int -xennet_get_extras(struct netfront_info *np, +xn_get_extras(struct netfront_rxq *rxq, struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) { struct netif_extra_info *extra; @@ -1087,55 +1454,46 @@ xennet_get_extras(struct netfront_info *np, grant_ref_t ref; if (__predict_false(*cons + 1 == rp)) { -#if 0 - if (net_ratelimit()) - WPRINTK("Missing extra info\n"); -#endif err = EINVAL; break; } extra = (struct netif_extra_info *) - RING_GET_RESPONSE(&np->rx, ++(*cons)); + RING_GET_RESPONSE(&rxq->ring, ++(*cons)); if (__predict_false(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { -#if 0 - if (net_ratelimit()) - WPRINTK("Invalid extra type: %d\n", - extra->type); -#endif err = EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } - m = xennet_get_rx_mbuf(np, *cons); - ref = xennet_get_rx_ref(np, *cons); - xennet_move_rx_slot(np, m, ref); + m = xn_get_rx_mbuf(rxq, *cons); + ref = xn_get_rx_ref(rxq, *cons); + xn_move_rx_slot(rxq, m, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return err; } static int -xennet_get_responses(struct netfront_info *np, - struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, - struct mbuf **list) +xn_get_responses(struct netfront_rxq *rxq, + struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, + struct mbuf **list) { struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; struct mbuf *m, *m0, *m_prev; - grant_ref_t ref = xennet_get_rx_ref(np, *cons); + grant_ref_t ref = xn_get_rx_ref(rxq, *cons); RING_IDX ref_cons = *cons; int frags = 1; int err = 0; u_long ret; - m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons); + m0 = m = m_prev = xn_get_rx_mbuf(rxq, *cons); if (rx->flags & NETRXF_extra_info) { - err = xennet_get_extras(np, extras, rp, cons); + err = xn_get_extras(rxq, extras, rp, cons); } if (m0 != NULL) { @@ -1151,12 +1509,7 @@ xennet_get_responses(struct netfront_info *np, if (__predict_false(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { -#if 0 - if (net_ratelimit()) - WPRINTK("rx->offset: %x, size: %u\n", - rx->offset, rx->status); -#endif - xennet_move_rx_slot(np, m, ref); + xn_move_rx_slot(rxq, m, ref); if (m0 == m) m0 = NULL; m = NULL; @@ -1170,12 +1523,7 @@ xennet_get_responses(struct netfront_info *np, * situation to the system controller to reboot the backed. */ if (ref == GRANT_REF_INVALID) { - -#if 0 - if (net_ratelimit()) - WPRINTK("Bad rx response id %d.\n", rx->id); -#endif - printf("%s: Bad rx response id %d.\n", __func__,rx->id); + printf("%s: Bad rx response id %d.\n", __func__, rx->id); err = EINVAL; goto next; } @@ -1183,7 +1531,7 @@ xennet_get_responses(struct netfront_info *np, ret = gnttab_end_foreign_access_ref(ref); KASSERT(ret, ("Unable to end access to grant references")); - gnttab_release_grant_reference(&np->gref_rx_head, ref); + gnttab_release_grant_reference(&rxq->gref_head, ref); next: if (m == NULL) @@ -1211,8 +1559,8 @@ next_skip_queue: */ m_prev = m; - rx = RING_GET_RESPONSE(&np->rx, *cons + frags); - m = xennet_get_rx_mbuf(np, *cons + frags); + rx = RING_GET_RESPONSE(&rxq->ring, *cons + frags); + m = xn_get_rx_mbuf(rxq, *cons + frags); /* * m_prev == NULL can happen if rx->status < 0 or if @@ -1228,7 +1576,7 @@ next_skip_queue: if (m0 == NULL) m0 = m; m->m_next = NULL; - ref = xennet_get_rx_ref(np, *cons + frags); + ref = xn_get_rx_ref(rxq, *cons + frags); ref_cons = *cons + frags; frags++; } @@ -1238,26 +1586,6 @@ next_skip_queue: return (err); } -static void -xn_tick_locked(struct netfront_info *sc) -{ - XN_RX_LOCK_ASSERT(sc); - callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); - - /* XXX placeholder for printing debug information */ -} - -static void -xn_tick(void *xsc) -{ - struct netfront_info *sc; - - sc = xsc; - XN_RX_LOCK(sc); - xn_tick_locked(sc); - XN_RX_UNLOCK(sc); -} - /** * \brief Count the number of fragments in an mbuf chain. * @@ -1279,15 +1607,14 @@ xn_count_frags(struct mbuf *m) * it onto the transmit ring. */ static int -xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) +xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) { - struct ifnet *ifp; struct mbuf *m; + struct netfront_info *np = txq->info; + struct ifnet *ifp = np->xn_ifp; u_int nfrags; int otherend_id; - ifp = sc->xn_ifp; - /** * Defragment the mbuf if necessary. */ @@ -1302,7 +1629,7 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of * the Linux network stack. */ - if (nfrags > sc->maxfrags) { + if (nfrags > np->maxfrags) { m = m_defrag(m_head, M_NOWAIT); if (!m) { /* @@ -1344,11 +1671,11 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) * have enough slots in the ring to handle a packet of maximum * size, and that our packet is less than the maximum size. Keep * it in here as an assert for now just to make certain that - * xn_tx_chain_cnt is accurate. + * chain_cnt is accurate. */ - KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE, - ("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " - "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt, + KASSERT((txq->mbufs_cnt + nfrags) <= NET_TX_RING_SIZE, + ("%s: chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " + "(%d)!", __func__, (int) txq->mbufs_cnt, (int) nfrags, (int) NET_TX_RING_SIZE)); /* @@ -1357,30 +1684,30 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) * of fragments or hit the end of the mbuf chain. */ m = m_head; - otherend_id = xenbus_get_otherend_id(sc->xbdev); + otherend_id = xenbus_get_otherend_id(np->xbdev); for (m = m_head; m; m = m->m_next) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ - tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt); - id = get_id_from_freelist(sc->tx_mbufs); + tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); + id = get_id_from_freelist(txq->mbufs); if (id == 0) panic("%s: was allocated the freelist head!\n", __func__); - sc->xn_cdata.xn_tx_chain_cnt++; - if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE) + txq->mbufs_cnt++; + if (txq->mbufs_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); - sc->tx_mbufs[id] = m; + txq->mbufs[id] = m; tx->id = id; - ref = gnttab_claim_grant_reference(&sc->gref_tx_head); + ref = gnttab_claim_grant_reference(&txq->gref_head); KASSERT((short)ref >= 0, ("Negative ref")); mfn = virt_to_mfn(mtod(m, vm_offset_t)); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); - tx->gref = sc->grant_tx_ref[id] = ref; + tx->gref = txq->grant_ref[id] = ref; tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); tx->flags = 0; if (m == m_head) { @@ -1414,8 +1741,8 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) - RING_GET_REQUEST(&sc->tx, - ++sc->tx.req_prod_pvt); + RING_GET_REQUEST(&txq->ring, + ++txq->ring.req_prod_pvt); tx->flags |= NETTXF_extra_info; @@ -1434,87 +1761,44 @@ xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) if (m->m_next) tx->flags |= NETTXF_more_data; - sc->tx.req_prod_pvt++; + txq->ring.req_prod_pvt++; } BPF_MTAP(ifp, m_head); - sc->stats.tx_bytes += m_head->m_pkthdr.len; - sc->stats.tx_packets++; - - return (0); -} - -static void -xn_start_locked(struct ifnet *ifp) -{ - struct netfront_info *sc; - struct mbuf *m_head; - int notify; - - sc = ifp->if_softc; + xn_txeof(txq); - if (!netfront_carrier_ok(sc)) - return; - - /* - * While we have enough transmit slots available for at least one - * maximum-sized packet, pull mbufs off the queue and put them on - * the transmit ring. - */ - while (xn_tx_slot_available(sc)) { - IF_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - - if (xn_assemble_tx_request(sc, m_head) != 0) - break; - } + txq->stats.tx_bytes += m_head->m_pkthdr.len; + txq->stats.tx_packets++; - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); - if (notify) - xen_intr_signal(sc->xen_intr_handle); - - if (RING_FULL(&sc->tx)) { - sc->tx_full = 1; -#if 0 - netif_stop_queue(dev); -#endif - } -} - -static void -xn_start(struct ifnet *ifp) -{ - struct netfront_info *sc; - sc = ifp->if_softc; - XN_TX_LOCK(sc); - xn_start_locked(ifp); - XN_TX_UNLOCK(sc); + return (0); } /* equivalent of network_open() in Linux */ static void -xn_ifinit_locked(struct netfront_info *sc) +xn_ifinit_locked(struct netfront_info *np) { struct ifnet *ifp; + int i; + struct netfront_rxq *rxq; - XN_LOCK_ASSERT(sc); + XN_LOCK_ASSERT(np); - ifp = sc->xn_ifp; + ifp = np->xn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; - xn_stop(sc); + xn_stop(np); - network_alloc_rx_buffers(sc); - sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; + for (i = 0; i < np->num_queues; i++) { + rxq = &np->rxq[i]; + xn_alloc_rx_buffers(rxq); + rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; + } ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); - - callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); } static void @@ -1556,17 +1840,9 @@ xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif break; case SIOCSIFMTU: - /* XXX can we alter the MTU on a VN ?*/ -#ifdef notyet - if (ifr->ifr_mtu > XN_JUMBO_MTU) - error = EINVAL; - else -#endif - { - ifp->if_mtu = ifr->ifr_mtu; - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - xn_ifinit(sc); - } + ifp->if_mtu = ifr->ifr_mtu; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + xn_ifinit(sc); break; case SIOCSIFFLAGS: XN_LOCK(sc); @@ -1579,21 +1855,7 @@ xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * waiting for it to start up, which may take a * second or two. */ -#ifdef notyet - /* No promiscuous mode with Xen */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - ifp->if_flags & IFF_PROMISC && - !(sc->xn_if_flags & IFF_PROMISC)) { - XN_SETBIT(sc, XN_RX_MODE, - XN_RXMODE_RX_PROMISC); - } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !(ifp->if_flags & IFF_PROMISC) && - sc->xn_if_flags & IFF_PROMISC) { - XN_CLRBIT(sc, XN_RX_MODE, - XN_RXMODE_RX_PROMISC); - } else -#endif - xn_ifinit_locked(sc); + xn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xn_stop(sc); @@ -1640,14 +1902,6 @@ xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCADDMULTI: case SIOCDELMULTI: -#ifdef notyet - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - XN_LOCK(sc); - xn_setmulti(sc); - XN_UNLOCK(sc); - error = 0; - } -#endif break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: @@ -1669,27 +1923,55 @@ xn_stop(struct netfront_info *sc) ifp = sc->xn_ifp; - callout_stop(&sc->xn_stat_ch); - - xn_free_rx_ring(sc); - xn_free_tx_ring(sc); - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } -/* START of Xenolinux helper functions adapted to FreeBSD */ -int -network_connect(struct netfront_info *np) +static void +xn_rebuild_rx_bufs(struct netfront_rxq *rxq) { - int i, requeue_idx, error; + int requeue_idx, i; grant_ref_t ref; netif_rx_request_t *req; + + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { + struct mbuf *m; + u_long pfn; + + if (rxq->mbufs[i] == NULL) + continue; + + m = rxq->mbufs[requeue_idx] = xn_get_rx_mbuf(rxq, i); + ref = rxq->grant_ref[requeue_idx] = xn_get_rx_ref(rxq, i); + + req = RING_GET_REQUEST(&rxq->ring, requeue_idx); + pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; + + gnttab_grant_foreign_access_ref(ref, + xenbus_get_otherend_id(rxq->info->xbdev), + pfn, 0); + + req->gref = ref; + req->id = requeue_idx; + + requeue_idx++; + } + + rxq->ring.req_prod_pvt = requeue_idx; +} + +/* START of Xenolinux helper functions adapted to FreeBSD */ +int +xn_connect(struct netfront_info *np) +{ + int i, error; u_int feature_rx_copy; + struct netfront_rxq *rxq; + struct netfront_txq *txq; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); - if (error) + if (error != 0) feature_rx_copy = 0; /* We only support rx copy. */ @@ -1698,51 +1980,39 @@ network_connect(struct netfront_info *np) /* Recovery procedure: */ error = talk_to_backend(np->xbdev, np); - if (error) + if (error != 0) return (error); /* Step 1: Reinitialise variables. */ xn_query_features(np); xn_configure_features(np); - netif_release_tx_bufs(np); - - /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ - for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { - struct mbuf *m; - u_long pfn; - - if (np->rx_mbufs[i] == NULL) - continue; - - m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); - ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); - - req = RING_GET_REQUEST(&np->rx, requeue_idx); - pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; - gnttab_grant_foreign_access_ref(ref, - xenbus_get_otherend_id(np->xbdev), - pfn, 0); - - req->gref = ref; - req->id = requeue_idx; - - requeue_idx++; + /* Step 2: Release TX buffer */ + for (i = 0; i < np->num_queues; i++) { + txq = &np->txq[i]; + xn_release_tx_bufs(txq); } - np->rx.req_prod_pvt = requeue_idx; + /* Step 3: Rebuild the RX buffer freelist and the RX ring itself. */ + for (i = 0; i < np->num_queues; i++) { + rxq = &np->rxq[i]; + xn_rebuild_rx_bufs(rxq); + } - /* Step 3: All public and private state should now be sane. Get + /* Step 4: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ netfront_carrier_on(np); - xen_intr_signal(np->xen_intr_handle); - XN_TX_LOCK(np); - xn_txeof(np); - XN_TX_UNLOCK(np); - network_alloc_rx_buffers(np); + for (i = 0; i < np->num_queues; i++) { + txq = &np->txq[i]; + xen_intr_signal(txq->xen_intr_handle); + XN_TX_LOCK(txq); + xn_txeof(txq); + XN_TX_UNLOCK(txq); + xn_alloc_rx_buffers(rxq); + } return (0); } @@ -1781,6 +2051,9 @@ static int xn_configure_features(struct netfront_info *np) { int err, cap_enabled; +#if (defined(INET) || defined(INET6)) + int i; +#endif err = 0; @@ -1798,21 +2071,25 @@ xn_configure_features(struct netfront_info *np) cap_enabled = UINT_MAX; #if (defined(INET) || defined(INET6)) - if ((np->xn_ifp->if_capenable & IFCAP_LRO) == (cap_enabled & IFCAP_LRO)) - tcp_lro_free(&np->xn_lro); + for (i = 0; i < np->num_queues; i++) + if ((np->xn_ifp->if_capenable & IFCAP_LRO) == + (cap_enabled & IFCAP_LRO)) + tcp_lro_free(&np->rxq[i].lro); #endif np->xn_ifp->if_capenable = np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4) & cap_enabled; np->xn_ifp->if_hwassist &= ~CSUM_TSO; #if (defined(INET) || defined(INET6)) - if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == - (cap_enabled & IFCAP_LRO)) { - err = tcp_lro_init(&np->xn_lro); - if (err) { - device_printf(np->xbdev, "LRO initialization failed\n"); - } else { - np->xn_lro.ifp = np->xn_ifp; - np->xn_ifp->if_capenable |= IFCAP_LRO; + for (i = 0; i < np->num_queues; i++) { + if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) == + (cap_enabled & IFCAP_LRO)) { + err = tcp_lro_init(&np->rxq[i].lro); + if (err != 0) { + device_printf(np->xbdev, "LRO initialization failed\n"); + } else { + np->rxq[i].lro.ifp = np->xn_ifp; + np->xn_ifp->if_capenable |= IFCAP_LRO; + } } } if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) == @@ -1824,6 +2101,111 @@ xn_configure_features(struct netfront_info *np) return (err); } +static int +xn_txq_mq_start_locked(struct netfront_txq *txq, struct mbuf *m) +{ + struct netfront_info *np; + struct ifnet *ifp; + struct buf_ring *br; + int error, notify; + + np = txq->info; + br = txq->br; + ifp = np->xn_ifp; + error = 0; + + XN_TX_LOCK_ASSERT(txq); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + !netfront_carrier_ok(np)) { + if (m != NULL) + error = drbr_enqueue(ifp, br, m); + return (error); + } + + if (m != NULL) { + error = drbr_enqueue(ifp, br, m); + if (error != 0) + return (error); + } + + while ((m = drbr_peek(ifp, br)) != NULL) { + if (!xn_tx_slot_available(txq)) { + drbr_putback(ifp, br, m); + break; + } + + error = xn_assemble_tx_request(txq, m); + /* xn_assemble_tx_request always consumes the mbuf*/ + if (error != 0) { + drbr_advance(ifp, br); + break; + } + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&txq->ring, notify); + if (notify) + xen_intr_signal(txq->xen_intr_handle); + + drbr_advance(ifp, br); + } + + if (RING_FULL(&txq->ring)) + txq->full = true; + + return (0); +} + +static int +xn_txq_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct netfront_info *np; + struct netfront_txq *txq; + int i, npairs, error; + + np = ifp->if_softc; + npairs = np->num_queues; + + /* check if flowid is set */ + if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) + i = m->m_pkthdr.flowid % npairs; + else + i = curcpu % npairs; + + txq = &np->txq[i]; + + if (XN_TX_TRYLOCK(txq) != 0) { + error = xn_txq_mq_start_locked(txq, m); + XN_TX_UNLOCK(txq); + } else { + error = drbr_enqueue(ifp, txq->br, m); + taskqueue_enqueue(txq->tq, &txq->defrtask); + } + + return (error); +} + +static void +xn_qflush(struct ifnet *ifp) +{ + struct netfront_info *np; + struct netfront_txq *txq; + struct mbuf *m; + int i; + + np = ifp->if_softc; + + for (i = 0; i < np->num_queues; i++) { + txq = &np->txq[i]; + + XN_TX_LOCK(txq); + while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) + m_freem(m); + XN_TX_UNLOCK(txq); + } + + if_qflush(ifp); +} + /** * Create a network device. * @param dev Newbus device representing this virtual NIC. @@ -1831,7 +2213,6 @@ xn_configure_features(struct netfront_info *np) int create_netdev(device_t dev) { - int i; struct netfront_info *np; int err; struct ifnet *ifp; @@ -1840,55 +2221,18 @@ create_netdev(device_t dev) np->xbdev = dev; - mtx_init(&np->tx_lock, "xntx", "netfront transmit lock", MTX_DEF); - mtx_init(&np->rx_lock, "xnrx", "netfront receive lock", MTX_DEF); mtx_init(&np->sc_lock, "xnsc", "netfront softc lock", MTX_DEF); ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); - np->rx_target = RX_MIN_TARGET; np->rx_min_target = RX_MIN_TARGET; np->rx_max_target = RX_MAX_TARGET; - /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for (i = 0; i <= NET_TX_RING_SIZE; i++) { - np->tx_mbufs[i] = (void *) ((u_long) i+1); - np->grant_tx_ref[i] = GRANT_REF_INVALID; - } - np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0; - - for (i = 0; i <= NET_RX_RING_SIZE; i++) { - - np->rx_mbufs[i] = NULL; - np->grant_rx_ref[i] = GRANT_REF_INVALID; - } - - mbufq_init(&np->xn_rx_batch, INT_MAX); - - /* A grant for every tx ring slot */ - if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, - &np->gref_tx_head) != 0) { - IPRINTK("#### netfront can't alloc tx grant refs\n"); - err = ENOMEM; - goto error; - } - /* A grant for every rx ring slot */ - if (gnttab_alloc_grant_references(RX_MAX_TARGET, - &np->gref_rx_head) != 0) { - WPRINTK("#### netfront can't alloc rx grant refs\n"); - gnttab_free_grant_references(np->gref_tx_head); - err = ENOMEM; - goto error; - } - err = xen_net_read_mac(dev, np->mac); - if (err) { - gnttab_free_grant_references(np->gref_rx_head); - gnttab_free_grant_references(np->gref_tx_head); + if (err != 0) goto error; - } /* Set up ifnet structure */ ifp = np->xn_ifp = if_alloc(IFT_ETHER); @@ -1896,12 +2240,11 @@ create_netdev(device_t dev) if_initname(ifp, "xn", device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xn_ioctl; - ifp->if_start = xn_start; -#ifdef notyet - ifp->if_watchdog = xn_watchdog; -#endif + + ifp->if_transmit = xn_txq_mq_start; + ifp->if_qflush = xn_qflush; + ifp->if_init = xn_ifinit; - ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; @@ -1910,7 +2253,6 @@ create_netdev(device_t dev) ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ether_ifattach(ifp, np->mac); - callout_init(&np->xn_stat_ch, 1); netfront_carrier_off(np); return (0); @@ -1920,27 +2262,6 @@ error: return (err); } -/** - * Handle the change of state of the backend to Closing. We must delete our - * device-layer structures now, to ensure that writes are flushed through to - * the backend. Once is this done, we can switch to Closed in - * acknowledgement. - */ -#if 0 -static void -netfront_closing(device_t dev) -{ -#if 0 - struct netfront_info *info = dev->dev_driver_data; - - DPRINTK("netfront_closing: %s removed\n", dev->nodename); - - close_netdev(info); -#endif - xenbus_switch_state(dev, XenbusStateClosed); -} -#endif - static int netfront_detach(device_t dev) { @@ -1954,58 +2275,55 @@ netfront_detach(device_t dev) } static void -netif_free(struct netfront_info *info) +netif_free(struct netfront_info *np) { - XN_LOCK(info); - xn_stop(info); - XN_UNLOCK(info); - callout_drain(&info->xn_stat_ch); - netif_disconnect_backend(info); - if (info->xn_ifp != NULL) { - ether_ifdetach(info->xn_ifp); - if_free(info->xn_ifp); - info->xn_ifp = NULL; + + XN_LOCK(np); + xn_stop(np); + XN_UNLOCK(np); + netif_disconnect_backend(np); + free(np->rxq, M_DEVBUF); + free(np->txq, M_DEVBUF); + if (np->xn_ifp != NULL) { + ether_ifdetach(np->xn_ifp); + if_free(np->xn_ifp); + np->xn_ifp = NULL; } - ifmedia_removeall(&info->sc_media); + ifmedia_removeall(&np->sc_media); } static void -netif_disconnect_backend(struct netfront_info *info) +netif_disconnect_backend(struct netfront_info *np) { - XN_RX_LOCK(info); - XN_TX_LOCK(info); - netfront_carrier_off(info); - XN_TX_UNLOCK(info); - XN_RX_UNLOCK(info); - - free_ring(&info->tx_ring_ref, &info->tx.sring); - free_ring(&info->rx_ring_ref, &info->rx.sring); - - xen_intr_unbind(&info->xen_intr_handle); -} + u_int i; -static void -free_ring(int *ref, void *ring_ptr_ref) -{ - void **ring_ptr_ptr = ring_ptr_ref; + for (i = 0; i < np->num_queues; i++) { + XN_RX_LOCK(&np->rxq[i]); + XN_TX_LOCK(&np->txq[i]); + } + netfront_carrier_off(np); + for (i = 0; i < np->num_queues; i++) { + XN_RX_UNLOCK(&np->rxq[i]); + XN_TX_UNLOCK(&np->txq[i]); + } - if (*ref != GRANT_REF_INVALID) { - /* This API frees the associated storage. */ - gnttab_end_foreign_access(*ref, *ring_ptr_ptr); - *ref = GRANT_REF_INVALID; + for (i = 0; i < np->num_queues; i++) { + disconnect_rxq(&np->rxq[i]); + disconnect_txq(&np->txq[i]); } - *ring_ptr_ptr = NULL; } static int xn_ifmedia_upd(struct ifnet *ifp) { + return (0); } static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { + ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } |