summaryrefslogtreecommitdiffstats
path: root/sys/dev/xen/netback/netback.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/xen/netback/netback.c')
-rw-r--r--sys/dev/xen/netback/netback.c3438
1 files changed, 2189 insertions, 1249 deletions
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
index b2be6e4..ef7b074 100644
--- a/sys/dev/xen/netback/netback.c
+++ b/sys/dev/xen/netback/netback.c
@@ -1,1595 +1,2535 @@
-/*
- * Copyright (c) 2006, Cisco Systems, Inc.
+/*-
+ * Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
* are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
*
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Authors: Justin T. Gibbs (Spectra Logic Corporation)
+ * Alan Somers (Spectra Logic Corporation)
+ * John Suykerbuyk (Spectra Logic Corporation)
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+
+/**
+ * \file netback.c
+ *
+ * \brief Device driver supporting the vending of network access
+ * from this FreeBSD domain to other domains.
+ */
+#include "opt_inet.h"
+#include "opt_global.h"
+
#include "opt_sctp.h"
#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
-#include <sys/module.h>
#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arp.h>
-#include <net/if_types.h>
#include <net/ethernet.h>
-#include <net/if_bridgevar.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
-#include <netinet/in_systm.h>
#include <netinet/in.h>
-#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
#include <netinet/tcp.h>
-#include <netinet/udp.h>
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_crc32.h>
#endif
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
-#include <machine/in_cksum.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-#include <machine/xenbus.h>
-#include <machine/gnttab.h>
-#include <machine/xen-public/memory.h>
-#include <dev/xen/xenbus/xenbus_comms.h>
-
-
-#ifdef XEN_NETBACK_DEBUG
-#define DPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#else
-#define DPRINTF(fmt, args...) ((void)0)
-#endif
+#include <machine/_inttypes.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
-#ifdef XEN_NETBACK_DEBUG_LOTS
-#define DDPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
-#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
-#else
-#define DDPRINTF(fmt, args...) ((void)0)
-#define DPRINTF_MBUF(_m) ((void)0)
-#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
-#endif
+#include <xen/evtchn.h>
+#include <xen/xen_intr.h>
+#include <xen/interface/io/netif.h>
+#include <xen/xenbus/xenbusvar.h>
-#define WPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+/*--------------------------- Compile-time Tunables --------------------------*/
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-#define BUG_ON PANIC_IF
+/*---------------------------------- Macros ----------------------------------*/
+/**
+ * Custom malloc type for all driver allocations.
+ */
+static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
-#define IFNAME(_np) (_np)->ifp->if_xname
+#define XNB_SG 1 /* netback driver supports feature-sg */
+#define XNB_GSO_TCPV4 1 /* netback driver supports feature-gso-tcpv4 */
+#define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */
+#define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+#undef XNB_DEBUG
+#define XNB_DEBUG /* hardcode on during development */
-struct ring_ref {
- vm_offset_t va;
- grant_handle_t handle;
- uint64_t bus_addr;
-};
+#ifdef XNB_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) do {} while (0)
+#endif
-typedef struct netback_info {
+/* Default length for stack-allocated grant tables */
+#define GNTTAB_LEN (64)
- /* Schedule lists */
- STAILQ_ENTRY(netback_info) next_tx;
- STAILQ_ENTRY(netback_info) next_rx;
- int on_tx_sched_list;
- int on_rx_sched_list;
+/* Features supported by all backends. TSO and LRO can be negotiated */
+#define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP)
- struct xenbus_device *xdev;
- XenbusState frontend_state;
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
- domid_t domid;
- int handle;
- char *bridge;
+/**
+ * Two argument version of the standard macro. Second argument is a tentative
+ * value of req_cons
+ */
+#define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \
+ unsigned int req = (_r)->sring->req_prod - cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ (cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+})
- int rings_connected;
- struct ring_ref tx_ring_ref;
- struct ring_ref rx_ring_ref;
- netif_tx_back_ring_t tx;
- netif_rx_back_ring_t rx;
- evtchn_port_t evtchn;
- int irq;
- void *irq_cookie;
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+#define virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
- struct ifnet *ifp;
- int ref_cnt;
+/**
+ * Predefined array type of grant table copy descriptors. Used to pass around
+ * statically allocated memory structures.
+ */
+typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
+
+/*--------------------------- Forward Declarations ---------------------------*/
+struct xnb_softc;
+struct xnb_pkt;
+
+static void xnb_attach_failed(struct xnb_softc *xnb,
+ int err, const char *fmt, ...)
+ __printflike(3,4);
+static int xnb_shutdown(struct xnb_softc *xnb);
+static int create_netdev(device_t dev);
+static int xnb_detach(device_t dev);
+static int xen_net_read_mac(device_t dev, uint8_t mac[]);
+static int xnb_ifmedia_upd(struct ifnet *ifp);
+static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
+static void xnb_intr(void *arg);
+static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
+ const struct mbuf *mbufc, gnttab_copy_table gnttab);
+static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
+ struct mbuf **mbufc, struct ifnet *ifnet,
+ gnttab_copy_table gnttab);
+static int xnb_ring2pkt(struct xnb_pkt *pkt,
+ const netif_tx_back_ring_t *tx_ring,
+ RING_IDX start);
+static void xnb_txpkt2rsp(const struct xnb_pkt *pkt,
+ netif_tx_back_ring_t *ring, int error);
+static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
+static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
+ const struct mbuf *mbufc,
+ gnttab_copy_table gnttab,
+ const netif_tx_back_ring_t *txb,
+ domid_t otherend_id);
+static void xnb_update_mbufc(struct mbuf *mbufc,
+ const gnttab_copy_table gnttab, int n_entries);
+static int xnb_mbufc2pkt(const struct mbuf *mbufc,
+ struct xnb_pkt *pkt,
+ RING_IDX start, int space);
+static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
+ const struct mbuf *mbufc,
+ gnttab_copy_table gnttab,
+ const netif_rx_back_ring_t *rxb,
+ domid_t otherend_id);
+static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
+ const gnttab_copy_table gnttab, int n_entries,
+ netif_rx_back_ring_t *ring);
+static void xnb_add_mbuf_cksum(struct mbuf *mbufc);
+static void xnb_stop(struct xnb_softc*);
+static int xnb_ioctl(struct ifnet*, u_long, caddr_t);
+static void xnb_start_locked(struct ifnet*);
+static void xnb_start(struct ifnet*);
+static void xnb_ifinit_locked(struct xnb_softc*);
+static void xnb_ifinit(void*);
+#ifdef XNB_DEBUG
+static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
+static int xnb_dump_rings(SYSCTL_HANDLER_ARGS);
+#endif
+/*------------------------------ Data Structures -----------------------------*/
- device_t ndev;
- int attached;
-} netif_t;
+/**
+ * Representation of a xennet packet. Simplified version of a packet as
+ * stored in the Xen tx ring. Applicable to both RX and TX packets
+ */
+struct xnb_pkt{
+ /**
+ * Array index of the first data-bearing (eg, not extra info) entry
+ * for this packet
+ */
+ RING_IDX car;
-#define MAX_PENDING_REQS 256
-#define PKT_PROT_LEN 64
+ /**
+ * Array index of the second data-bearing entry for this packet.
+ * Invalid if the packet has only one data-bearing entry. If the
+ * packet has more than two data-bearing entries, then the second
+ * through the last will be sequential modulo the ring size
+ */
+ RING_IDX cdr;
-static struct {
- netif_tx_request_t req;
- netif_t *netif;
-} pending_tx_info[MAX_PENDING_REQS];
-static uint16_t pending_ring[MAX_PENDING_REQS];
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+ /**
+ * Optional extra info. Only valid if flags contains
+ * NETTXF_extra_info. Note that extra.type will always be
+ * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback
+ * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
+ */
+ netif_extra_info_t extra;
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+ /** Size of entire packet in bytes. */
+ uint16_t size;
-/* Freed TX mbufs get batched on this ring before return to pending_ring. */
-static uint16_t dealloc_ring[MAX_PENDING_REQS];
-static PEND_RING_IDX dealloc_prod, dealloc_cons;
+ /** The size of the first entry's data in bytes */
+ uint16_t car_size;
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+ /**
+ * Either NETTXF_ or NETRXF_ flags. Note that the flag values are
+ * not the same for TX and RX packets
+ */
+ uint16_t flags;
-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+ /**
+ * The number of valid data-bearing entries (either netif_tx_request's
+ * or netif_rx_response's) in the packet. If this is 0, it means the
+ * entire packet is invalid.
+ */
+ uint16_t list_len;
-static struct task net_tx_task, net_rx_task;
-static struct callout rx_task_callout;
+ /** There was an error processing the packet */
+ uint8_t error;
+};
-static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
- STAILQ_HEAD_INITIALIZER(tx_sched_list);
-static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
- STAILQ_HEAD_INITIALIZER(rx_sched_list);
-static struct mtx tx_sched_list_lock;
-static struct mtx rx_sched_list_lock;
+/** xnb_pkt method: initialize it */
+static inline void
+xnb_pkt_initialize(struct xnb_pkt *pxnb)
+{
+ bzero(pxnb, sizeof(*pxnb));
+}
-static int vif_unit_maker = 0;
+/** xnb_pkt method: mark the packet as valid */
+static inline void
+xnb_pkt_validate(struct xnb_pkt *pxnb)
+{
+ pxnb->error = 0;
+};
-/* Protos */
-static void netback_start(struct ifnet *ifp);
-static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static int vif_add_dev(struct xenbus_device *xdev);
-static void disconnect_rings(netif_t *netif);
+/** xnb_pkt method: mark the packet as invalid */
+static inline void
+xnb_pkt_invalidate(struct xnb_pkt *pxnb)
+{
+ pxnb->error = 1;
+};
+
+/** xnb_pkt method: Check whether the packet is valid */
+static inline int
+xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
+{
+ return (! pxnb->error);
+}
+
+#ifdef XNB_DEBUG
+/** xnb_pkt method: print the packet's contents in human-readable format*/
+static void __unused
+xnb_dump_pkt(const struct xnb_pkt *pkt) {
+ if (pkt == NULL) {
+ DPRINTF("Was passed a null pointer.\n");
+ return;
+ }
+ DPRINTF("pkt address= %p\n", pkt);
+ DPRINTF("pkt->size=%d\n", pkt->size);
+ DPRINTF("pkt->car_size=%d\n", pkt->car_size);
+ DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
+ DPRINTF("pkt->list_len=%d\n", pkt->list_len);
+ /* DPRINTF("pkt->extra"); TODO */
+ DPRINTF("pkt->car=%d\n", pkt->car);
+ DPRINTF("pkt->cdr=%d\n", pkt->cdr);
+ DPRINTF("pkt->error=%d\n", pkt->error);
+}
+#endif /* XNB_DEBUG */
-#ifdef XEN_NETBACK_DEBUG_LOTS
-/* Debug code to display the contents of an mbuf */
static void
-print_mbuf(struct mbuf *m, int max)
+xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
{
- int i, j=0;
- printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
- for (; m; m = m->m_next) {
- unsigned char *d = m->m_data;
- for (i=0; i < m->m_len; i++) {
- if (max && j == max)
- break;
- if ((j++ % 16) == 0)
- printf("\n%04x:", j);
- printf(" %02x", d[i]);
- }
+ if (txreq != NULL) {
+ DPRINTF("netif_tx_request index =%u\n", idx);
+ DPRINTF("netif_tx_request.gref =%u\n", txreq->gref);
+ DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
+ DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
+ DPRINTF("netif_tx_request.id =%hu\n", txreq->id);
+ DPRINTF("netif_tx_request.size =%hu\n", txreq->size);
}
- printf("\n");
}
-#endif
-#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
-static unsigned int alloc_index = 0;
+/**
+ * \brief Configuration data for a shared memory request ring
+ * used to communicate with the front-end client of this
+ * this driver.
+ */
+struct xnb_ring_config {
+ /**
+ * Runtime structures for ring access. Unfortunately, TX and RX rings
+ * use different data structures, and that cannot be changed since it
+ * is part of the interdomain protocol.
+ */
+ union{
+ netif_rx_back_ring_t rx_ring;
+ netif_tx_back_ring_t tx_ring;
+ } back_ring;
+
+ /**
+ * The device bus address returned by the hypervisor when
+ * mapping the ring and required to unmap it when a connection
+ * is torn down.
+ */
+ uint64_t bus_addr;
-static unsigned long
-alloc_mfn(void)
-{
- unsigned long mfn = 0;
- struct xen_memory_reservation reservation = {
- .extent_start = mfn_list,
- .nr_extents = MAX_MFN_ALLOC,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- if ( unlikely(alloc_index == 0) )
- alloc_index = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
- if ( alloc_index != 0 )
- mfn = mfn_list[--alloc_index];
- return mfn;
-}
+ /** The pseudo-physical address where ring memory is mapped.*/
+ uint64_t gnt_addr;
+
+ /** KVA address where ring memory is mapped. */
+ vm_offset_t va;
+
+ /**
+ * Grant table handles, one per-ring page, returned by the
+ * hyperpervisor upon mapping of the ring and required to
+ * unmap it when a connection is torn down.
+ */
+ grant_handle_t handle;
+
+ /** The number of ring pages mapped for the current connection. */
+ unsigned ring_pages;
-static unsigned long
-alloc_empty_page_range(unsigned long nr_pages)
+ /**
+ * The grant references, one per-ring page, supplied by the
+ * front-end, allowing us to reference the ring pages in the
+ * front-end's domain and to map these pages into our own domain.
+ */
+ grant_ref_t ring_ref;
+};
+
+/**
+ * Per-instance connection state flags.
+ */
+typedef enum
{
- void *pages;
- int i = 0, j = 0;
- multicall_entry_t mcl[17];
- unsigned long mfn_list[16];
- struct xen_memory_reservation reservation = {
- .extent_start = mfn_list,
- .nr_extents = 0,
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
+ /** Communication with the front-end has been established. */
+ XNBF_RING_CONNECTED = 0x01,
- pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
- if (pages == NULL)
- return 0;
+ /**
+ * Front-end requests exist in the ring and are waiting for
+ * xnb_xen_req objects to free up.
+ */
+ XNBF_RESOURCE_SHORTAGE = 0x02,
- memset(mcl, 0, sizeof(mcl));
+ /** Connection teardown has started. */
+ XNBF_SHUTDOWN = 0x04,
- while (i < nr_pages) {
- unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+ /** A thread is already performing shutdown processing. */
+ XNBF_IN_SHUTDOWN = 0x08
+} xnb_flag_t;
- mcl[j].op = __HYPERVISOR_update_va_mapping;
- mcl[j].args[0] = va;
+/**
+ * Types of rings. Used for array indices and to identify a ring's control
+ * data structure type
+ */
+typedef enum{
+ XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */
+ XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */
+ XNB_NUM_RING_TYPES
+} xnb_ring_type_t;
- mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+/**
+ * Per-instance configuration data.
+ */
+struct xnb_softc {
+ /** NewBus device corresponding to this instance. */
+ device_t dev;
- xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+ /* Media related fields */
- if (j == 16 || i == nr_pages) {
- mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+ /** Generic network media state */
+ struct ifmedia sc_media;
- reservation.nr_extents = j;
+ /** Media carrier info */
+ struct ifnet *xnb_ifp;
- mcl[j].op = __HYPERVISOR_memory_op;
- mcl[j].args[0] = XENMEM_decrease_reservation;
- mcl[j].args[1] = (unsigned long)&reservation;
-
- (void)HYPERVISOR_multicall(mcl, j+1);
+ /** Our own private carrier state */
+ unsigned carrier;
- mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
- j = 0;
- }
- }
+ /** Device MAC Address */
+ uint8_t mac[ETHER_ADDR_LEN];
- return (unsigned long)pages;
-}
+ /* Xen related fields */
-#ifdef XEN_NETBACK_FIXUP_CSUM
-static void
-fixup_checksum(struct mbuf *m)
-{
- struct ether_header *eh = mtod(m, struct ether_header *);
- struct ip *ip = (struct ip *)(eh + 1);
- int iphlen = ip->ip_hl << 2;
- int iplen = ntohs(ip->ip_len);
-
- if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
- struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(IPPROTO_TCP + (iplen - iphlen)));
- th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
- m->m_pkthdr.csum_flags &= ~CSUM_TCP;
-#ifdef SCTP
- } else if (sw_csum & CSUM_SCTP) {
- sctp_delayed_cksum(m, iphlen);
- sw_csum &= ~CSUM_SCTP;
-#endif
- } else {
- u_short csum;
- struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
- uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(IPPROTO_UDP + (iplen - iphlen)));
- if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
- csum = 0xffff;
- uh->uh_sum = csum;
- m->m_pkthdr.csum_flags &= ~CSUM_UDP;
- }
-}
+ /**
+ * \brief The netif protocol abi in effect.
+ *
+ * There are situations where the back and front ends can
+ * have a different, native abi (e.g. intel x86_64 and
+ * 32bit x86 domains on the same machine). The back-end
+ * always accomodates the front-end's native abi. That
+ * value is pulled from the XenStore and recorded here.
+ */
+ int abi;
+
+ /**
+ * Name of the bridge to which this VIF is connected, if any
+ * This field is dynamically allocated by xenbus and must be free()ed
+ * when no longer needed
+ */
+ char *bridge;
+
+ /** The interrupt driven even channel used to signal ring events. */
+ evtchn_port_t evtchn;
+
+ /** Xen device handle.*/
+ long handle;
+
+ /** IRQ mapping for the communication ring event channel. */
+ int irq;
+
+ /**
+ * \brief Cached value of the front-end's domain id.
+ *
+ * This value is used at once for each mapped page in
+ * a transaction. We cache it to avoid incuring the
+ * cost of an ivar access every time this is needed.
+ */
+ domid_t otherend_id;
+
+ /**
+ * Undocumented frontend feature. Has something to do with
+ * scatter/gather IO
+ */
+ uint8_t can_sg;
+ /** Undocumented frontend feature */
+ uint8_t gso;
+ /** Undocumented frontend feature */
+ uint8_t gso_prefix;
+ /** Can checksum TCP/UDP over IPv4 */
+ uint8_t ip_csum;
+
+ /* Implementation related fields */
+ /**
+ * Preallocated grant table copy descriptor for RX operations.
+ * Access must be protected by rx_lock
+ */
+ gnttab_copy_table rx_gnttab;
+
+ /**
+ * Preallocated grant table copy descriptor for TX operations.
+ * Access must be protected by tx_lock
+ */
+ gnttab_copy_table tx_gnttab;
+
+#ifdef XENHVM
+ /**
+ * Resource representing allocated physical address space
+ * associated with our per-instance kva region.
+ */
+ struct resource *pseudo_phys_res;
+
+ /** Resource id for allocated physical address space. */
+ int pseudo_phys_res_id;
#endif
-/* Add the interface to the specified bridge */
-static int
-add_to_bridge(struct ifnet *ifp, char *bridge)
-{
- struct ifdrv ifd;
- struct ifbreq ifb;
- struct ifnet *ifp_bridge = ifunit(bridge);
+ /** Ring mapping and interrupt configuration data. */
+ struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES];
- if (!ifp_bridge)
- return ENOENT;
+ /**
+ * Global pool of kva used for mapping remote domain ring
+ * and I/O transaction data.
+ */
+ vm_offset_t kva;
- bzero(&ifd, sizeof(ifd));
- bzero(&ifb, sizeof(ifb));
+ /** Psuedo-physical address corresponding to kva. */
+ uint64_t gnt_base_addr;
- strcpy(ifb.ifbr_ifsname, ifp->if_xname);
- strcpy(ifd.ifd_name, ifp->if_xname);
- ifd.ifd_cmd = BRDGADD;
- ifd.ifd_len = sizeof(ifb);
- ifd.ifd_data = &ifb;
+ /** Various configuration and state bit flags. */
+ xnb_flag_t flags;
- return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
-
-}
+ /** Mutex protecting per-instance data in the receive path. */
+ struct mtx rx_lock;
-static int
-netif_create(int handle, struct xenbus_device *xdev, char *bridge)
-{
- netif_t *netif;
- struct ifnet *ifp;
+ /** Mutex protecting per-instance data in the softc structure. */
+ struct mtx sc_lock;
- netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
- if (!netif)
- return ENOMEM;
+ /** Mutex protecting per-instance data in the transmit path. */
+ struct mtx tx_lock;
- netif->ref_cnt = 1;
- netif->handle = handle;
- netif->domid = xdev->otherend_id;
- netif->xdev = xdev;
- netif->bridge = bridge;
- xdev->data = netif;
-
- /* Set up ifnet structure */
- ifp = netif->ifp = if_alloc(IFT_ETHER);
- if (!ifp) {
- if (bridge)
- free(bridge, M_DEVBUF);
- free(netif, M_DEVBUF);
- return ENOMEM;
+ /** The size of the global kva pool. */
+ int kva_size;
+};
+
+/*---------------------------- Debugging functions ---------------------------*/
+#ifdef XNB_DEBUG
+static void __unused
+xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
+{
+ if (entry == NULL) {
+ printf("NULL grant table pointer\n");
+ return;
}
- ifp->if_softc = netif;
- if_initname(ifp, "vif",
- atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
- ifp->if_output = ether_output;
- ifp->if_start = netback_start;
- ifp->if_ioctl = netback_ioctl;
- ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
-
- DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+ if (entry->flags & GNTCOPY_dest_gref)
+ printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
+ else
+ printf("gnttab dest gmfn=\t%lu\n", entry->dest.u.gmfn);
+ printf("gnttab dest offset=\t%hu\n", entry->dest.offset);
+ printf("gnttab dest domid=\t%hu\n", entry->dest.domid);
+ if (entry->flags & GNTCOPY_source_gref)
+ printf("gnttab source ref=\t%u\n", entry->source.u.ref);
+ else
+ printf("gnttab source gmfn=\t%lu\n", entry->source.u.gmfn);
+ printf("gnttab source offset=\t%hu\n", entry->source.offset);
+ printf("gnttab source domid=\t%hu\n", entry->source.domid);
+ printf("gnttab len=\t%hu\n", entry->len);
+ printf("gnttab flags=\t%hu\n", entry->flags);
+ printf("gnttab status=\t%hd\n", entry->status);
+}
- return 0;
+static int
+xnb_dump_rings(SYSCTL_HANDLER_ARGS)
+{
+ static char results[720];
+ struct xnb_softc const* xnb = (struct xnb_softc*)arg1;
+ netif_rx_back_ring_t const* rxb =
+ &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
+ netif_tx_back_ring_t const* txb =
+ &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
+
+ /* empty the result strings */
+ results[0] = 0;
+
+ if ( !txb || !txb->sring || !rxb || !rxb->sring )
+ return (SYSCTL_OUT(req, results, strnlen(results, 720)));
+
+ snprintf(results, 720,
+ "\n\t%35s %18s\n" /* TX, RX */
+ "\t%16s %18d %18d\n" /* req_cons */
+ "\t%16s %18d %18d\n" /* nr_ents */
+ "\t%16s %18d %18d\n" /* rsp_prod_pvt */
+ "\t%16s %18p %18p\n" /* sring */
+ "\t%16s %18d %18d\n" /* req_prod */
+ "\t%16s %18d %18d\n" /* req_event */
+ "\t%16s %18d %18d\n" /* rsp_prod */
+ "\t%16s %18d %18d\n", /* rsp_event */
+ "TX", "RX",
+ "req_cons", txb->req_cons, rxb->req_cons,
+ "nr_ents", txb->nr_ents, rxb->nr_ents,
+ "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt,
+ "sring", txb->sring, rxb->sring,
+ "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod,
+ "sring->req_event", txb->sring->req_event, rxb->sring->req_event,
+ "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod,
+ "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event);
+
+ return (SYSCTL_OUT(req, results, strnlen(results, 720)));
}
-static void
-netif_get(netif_t *netif)
+static void __unused
+xnb_dump_mbuf(const struct mbuf *m)
{
- atomic_add_int(&netif->ref_cnt, 1);
+ int len;
+ uint8_t *d;
+ if (m == NULL)
+ return;
+
+ printf("xnb_dump_mbuf:\n");
+ if (m->m_flags & M_PKTHDR) {
+ printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
+ "tso_segsz=%5hd\n",
+ m->m_pkthdr.flowid, m->m_pkthdr.csum_flags,
+ m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
+ printf(" rcvif=%16p, header=%18p, len=%19d\n",
+ m->m_pkthdr.rcvif, m->m_pkthdr.header, m->m_pkthdr.len);
+ }
+ printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
+ m->m_next, m->m_nextpkt, m->m_data);
+ printf(" m_len=%17d, m_flags=%#15x, m_type=%18hd\n",
+ m->m_len, m->m_flags, m->m_type);
+
+ len = m->m_len;
+ d = mtod(m, uint8_t*);
+ while (len > 0) {
+ int i;
+ printf(" ");
+ for (i = 0; (i < 16) && (len > 0); i++, len--) {
+ printf("%02hhx ", *(d++));
+ }
+ printf("\n");
+ }
}
+#endif /* XNB_DEBUG */
+/*------------------------ Inter-Domain Communication ------------------------*/
+/**
+ * Free dynamically allocated KVA or pseudo-physical address allocations.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ */
static void
-netif_put(netif_t *netif)
+xnb_free_communication_mem(struct xnb_softc *xnb)
{
- if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
- DPRINTF("%s\n", IFNAME(netif));
- disconnect_rings(netif);
- if (netif->ifp) {
- if_free(netif->ifp);
- netif->ifp = NULL;
+ if (xnb->kva != 0) {
+#ifndef XENHVM
+ kmem_free(kernel_map, xnb->kva, xnb->kva_size);
+#else
+ if (xnb->pseudo_phys_res != NULL) {
+ bus_release_resource(xnb->dev, SYS_RES_MEMORY,
+ xnb->pseudo_phys_res_id,
+ xnb->pseudo_phys_res);
+ xnb->pseudo_phys_res = NULL;
}
- if (netif->bridge)
- free(netif->bridge, M_DEVBUF);
- free(netif, M_DEVBUF);
+#endif /* XENHVM */
}
+ xnb->kva = 0;
+ xnb->gnt_base_addr = 0;
}
+/**
+ * Cleanup all inter-domain communication mechanisms.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ */
static int
-netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+xnb_disconnect(struct xnb_softc *xnb)
{
- switch (cmd) {
- case SIOCSIFFLAGS:
- DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
- IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
- return 0;
+ struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES];
+ int error;
+ int i;
+
+ if (xnb->irq != 0) {
+ unbind_from_irqhandler(xnb->irq);
+ xnb->irq = 0;
}
- DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
+ /*
+ * We may still have another thread currently processing requests. We
+ * must acquire the rx and tx locks to make sure those threads are done,
+ * but we can release those locks as soon as we acquire them, because no
+ * more interrupts will be arriving.
+ */
+ mtx_lock(&xnb->tx_lock);
+ mtx_unlock(&xnb->tx_lock);
+ mtx_lock(&xnb->rx_lock);
+ mtx_unlock(&xnb->rx_lock);
+
+ /* Free malloc'd softc member variables */
+ if (xnb->bridge != NULL)
+ free(xnb->bridge, M_XENSTORE);
+
+ /* All request processing has stopped, so unmap the rings */
+ for (i=0; i < XNB_NUM_RING_TYPES; i++) {
+ gnts[i].host_addr = xnb->ring_configs[i].gnt_addr;
+ gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr;
+ gnts[i].handle = xnb->ring_configs[i].handle;
+ }
+ error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts,
+ XNB_NUM_RING_TYPES);
+ KASSERT(error == 0, ("Grant table unmap op failed (%d)", error));
- return ether_ioctl(ifp, cmd, data);
-}
+ xnb_free_communication_mem(xnb);
+ /*
+ * Zero the ring config structs because the pointers, handles, and
+ * grant refs contained therein are no longer valid.
+ */
+ bzero(&xnb->ring_configs[XNB_RING_TYPE_TX],
+ sizeof(struct xnb_ring_config));
+ bzero(&xnb->ring_configs[XNB_RING_TYPE_RX],
+ sizeof(struct xnb_ring_config));
-static inline void
-maybe_schedule_tx_action(void)
-{
- smp_mb();
- if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
- taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+ xnb->flags &= ~XNBF_RING_CONNECTED;
+ return (0);
}
-/* Removes netif from front of list and does not call netif_put() (caller must) */
-static netif_t *
-remove_from_tx_schedule_list(void)
+/**
+ * Map a single shared memory ring into domain local address space and
+ * initialize its control structure
+ *
+ * \param xnb Per-instance xnb configuration structure
+ * \param ring_type Array index of this ring in the xnb's array of rings
+ * \return An errno
+ */
+static int
+xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type)
{
- netif_t *netif;
+ struct gnttab_map_grant_ref gnt;
+ struct xnb_ring_config *ring = &xnb->ring_configs[ring_type];
+ int error;
- mtx_lock(&tx_sched_list_lock);
-
- if ((netif = STAILQ_FIRST(&tx_sched_list))) {
- STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
- STAILQ_NEXT(netif, next_tx) = NULL;
- netif->on_tx_sched_list = 0;
- }
+ /* TX ring type = 0, RX =1 */
+ ring->va = xnb->kva + ring_type * PAGE_SIZE;
+ ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE;
- mtx_unlock(&tx_sched_list_lock);
+ gnt.host_addr = ring->gnt_addr;
+ gnt.flags = GNTMAP_host_map;
+ gnt.ref = ring->ring_ref;
+ gnt.dom = xnb->otherend_id;
- return netif;
-}
-
-/* Adds netif to end of list and calls netif_get() */
-static void
-add_to_tx_schedule_list_tail(netif_t *netif)
-{
- if (netif->on_tx_sched_list)
- return;
+ error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1);
+ if (error != 0)
+ panic("netback: Ring page grant table op failed (%d)", error);
- mtx_lock(&tx_sched_list_lock);
- if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- netif_get(netif);
- STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
- netif->on_tx_sched_list = 1;
+ if (gnt.status != 0) {
+ ring->va = 0;
+ error = EACCES;
+ xenbus_dev_fatal(xnb->dev, error,
+ "Ring shared page mapping failed. "
+ "Status %d.", gnt.status);
+ } else {
+ ring->handle = gnt.handle;
+ ring->bus_addr = gnt.dev_bus_addr;
+
+ if (ring_type == XNB_RING_TYPE_TX) {
+ BACK_RING_INIT(&ring->back_ring.tx_ring,
+ (netif_tx_sring_t*)ring->va,
+ ring->ring_pages * PAGE_SIZE);
+ } else if (ring_type == XNB_RING_TYPE_RX) {
+ BACK_RING_INIT(&ring->back_ring.rx_ring,
+ (netif_rx_sring_t*)ring->va,
+ ring->ring_pages * PAGE_SIZE);
+ } else {
+ xenbus_dev_fatal(xnb->dev, error,
+ "Unknown ring type %d", ring_type);
+ }
}
- mtx_unlock(&tx_sched_list_lock);
+
+ return error;
}
-/*
- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
- * If this driver is pipelining transmit requests then we can be very
- * aggressive in avoiding new-packet notifications -- frontend only needs to
- * send a notification if there are no outstanding unreceived responses.
- * If we may be buffer transmit buffers for any reason then we must be rather
- * more conservative and treat this as the final check for pending work.
+/**
+ * Setup the shared memory rings and bind an interrupt to the event channel
+ * used to notify us of ring changes.
+ *
+ * \param xnb Per-instance xnb configuration structure.
*/
-static void
-netif_schedule_tx_work(netif_t *netif)
+static int
+xnb_connect_comms(struct xnb_softc *xnb)
{
- int more_to_do;
+ int error;
+ xnb_ring_type_t i;
-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
- more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
-#else
- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-#endif
+ if ((xnb->flags & XNBF_RING_CONNECTED) != 0)
+ return (0);
- if (more_to_do) {
- DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
- add_to_tx_schedule_list_tail(netif);
- maybe_schedule_tx_action();
+ /*
+ * Kva for our rings are at the tail of the region of kva allocated
+ * by xnb_alloc_communication_mem().
+ */
+ for (i=0; i < XNB_NUM_RING_TYPES; i++) {
+ error = xnb_connect_ring(xnb, i);
+ if (error != 0)
+ return error;
}
-}
-static struct mtx dealloc_lock;
-MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
+ xnb->flags |= XNBF_RING_CONNECTED;
+
+ error =
+ bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id,
+ xnb->evtchn,
+ device_get_nameunit(xnb->dev),
+ xnb_intr, /*arg*/xnb,
+ INTR_TYPE_BIO | INTR_MPSAFE,
+ &xnb->irq);
+ if (error != 0) {
+ (void)xnb_disconnect(xnb);
+ xenbus_dev_fatal(xnb->dev, error, "binding event channel");
+ return (error);
+ }
-static void
-netif_idx_release(uint16_t pending_idx)
-{
- mtx_lock_spin(&dealloc_lock);
- dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
- mtx_unlock_spin(&dealloc_lock);
+ DPRINTF("rings connected!\n");
- taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+ return (0);
}
-static void
-make_tx_response(netif_t *netif,
- uint16_t id,
- int8_t st)
+/**
+ * Size KVA and pseudo-physical address allocations based on negotiated
+ * values for the size and number of I/O requests, and the size of our
+ * communication ring.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ *
+ * These address spaces are used to dynamically map pages in the
+ * front-end's domain into our own.
+ */
+static int
+xnb_alloc_communication_mem(struct xnb_softc *xnb)
{
- RING_IDX i = netif->tx.rsp_prod_pvt;
- netif_tx_response_t *resp;
- int notify;
-
- resp = RING_GET_RESPONSE(&netif->tx, i);
- resp->id = id;
- resp->status = st;
-
- netif->tx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
- if (notify)
- notify_remote_via_irq(netif->irq);
-
-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
- if (i == netif->tx.req_cons) {
- int more_to_do;
- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
- if (more_to_do)
- add_to_tx_schedule_list_tail(netif);
+ xnb_ring_type_t i;
+
+ xnb->kva_size = 0;
+ for (i=0; i < XNB_NUM_RING_TYPES; i++) {
+ xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
}
-#endif
+#ifndef XENHVM
+ xnb->kva = kmem_alloc_nofault(kernel_map, xnb->kva_size);
+ if (xnb->kva == 0)
+ return (ENOMEM);
+ xnb->gnt_base_addr = xnb->kva;
+#else /* defined XENHVM */
+ /*
+ * Reserve a range of pseudo physical memory that we can map
+ * into kva. These pages will only be backed by machine
+ * pages ("real memory") during the lifetime of front-end requests
+ * via grant table operations. We will map the netif tx and rx rings
+ * into this space.
+ */
+ xnb->pseudo_phys_res_id = 0;
+ xnb->pseudo_phys_res = bus_alloc_resource(xnb->dev, SYS_RES_MEMORY,
+ &xnb->pseudo_phys_res_id,
+ 0, ~0, xnb->kva_size,
+ RF_ACTIVE);
+ if (xnb->pseudo_phys_res == NULL) {
+ xnb->kva = 0;
+ return (ENOMEM);
+ }
+ xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
+ xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
+#endif /* !defined XENHVM */
+ return (0);
}
-static inline void
-net_tx_action_dealloc(void)
+/**
+ * Collect information from the XenStore related to our device and its frontend
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ */
+static int
+xnb_collect_xenstore_info(struct xnb_softc *xnb)
{
- gnttab_unmap_grant_ref_t *gop;
- uint16_t pending_idx;
- PEND_RING_IDX dc, dp;
- netif_t *netif;
- int ret;
+ /**
+ * \todo Linux collects the following info. We should collect most
+ * of this, too:
+ * "feature-rx-notify"
+ */
+ const char *otherend_path;
+ const char *our_path;
+ int err;
+ unsigned int rx_copy, bridge_len;
+ uint8_t no_csum_offload;
+
+ otherend_path = xenbus_get_otherend_path(xnb->dev);
+ our_path = xenbus_get_node(xnb->dev);
+
+ /* Collect the critical communication parameters */
+ err = xs_gather(XST_NIL, otherend_path,
+ "tx-ring-ref", "%l" PRIu32,
+ &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref,
+ "rx-ring-ref", "%l" PRIu32,
+ &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref,
+ "event-channel", "%" PRIu32, &xnb->evtchn,
+ NULL);
+ if (err != 0) {
+ xenbus_dev_fatal(xnb->dev, err,
+ "Unable to retrieve ring information from "
+ "frontend %s. Unable to connect.",
+ otherend_path);
+ return (err);
+ }
- dc = dealloc_cons;
- dp = dealloc_prod;
+ /* Collect the handle from xenstore */
+ err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle);
+ if (err != 0) {
+ xenbus_dev_fatal(xnb->dev, err,
+ "Error reading handle from frontend %s. "
+ "Unable to connect.", otherend_path);
+ }
/*
- * Free up any grants we have finished using
+ * Collect the bridgename, if any. We do not need bridge_len; we just
+ * throw it away
*/
- gop = tx_unmap_ops;
- while (dc != dp) {
- pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
- gop->host_addr = MMAP_VADDR(pending_idx);
- gop->dev_bus_addr = 0;
- gop->handle = grant_tx_handle[pending_idx];
- gop++;
- }
- ret = HYPERVISOR_grant_table_op(
- GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
- BUG_ON(ret);
+ err = xs_read(XST_NIL, our_path, "bridge", &bridge_len,
+ (void**)&xnb->bridge);
+ if (err != 0)
+ xnb->bridge = NULL;
- while (dealloc_cons != dp) {
- pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+ /*
+ * Does the frontend request that we use rx copy? If not, return an
+ * error because this driver only supports rx copy.
+ */
+ err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL,
+ "%" PRIu32, &rx_copy);
+ if (err == ENOENT) {
+ err = 0;
+ rx_copy = 0;
+ }
+ if (err < 0) {
+ xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy",
+ otherend_path);
+ return err;
+ }
+ /**
+ * \todo: figure out the exact meaning of this feature, and when
+ * the frontend will set it to true. It should be set to true
+ * at some point
+ */
+/* if (!rx_copy)*/
+/* return EOPNOTSUPP;*/
- netif = pending_tx_info[pending_idx].netif;
+ /** \todo Collect the rx notify feature */
- make_tx_response(netif, pending_tx_info[pending_idx].req.id,
- NETIF_RSP_OKAY);
-
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ /* Collect the feature-sg. */
+ if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL,
+ "%hhu", &xnb->can_sg) < 0)
+ xnb->can_sg = 0;
- netif_put(netif);
- }
-}
+ /* Collect remaining frontend features */
+ if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL,
+ "%hhu", &xnb->gso) < 0)
+ xnb->gso = 0;
-static void
-netif_page_release(void *buf, void *args)
-{
- uint16_t pending_idx = (unsigned int)args;
-
- DDPRINTF("pending_idx=%u\n", pending_idx);
+ if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL,
+ "%hhu", &xnb->gso_prefix) < 0)
+ xnb->gso_prefix = 0;
- KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
+ if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL,
+ "%hhu", &no_csum_offload) < 0)
+ no_csum_offload = 0;
+ xnb->ip_csum = (no_csum_offload == 0);
- netif_idx_release(pending_idx);
+ return (0);
}
-static void
-net_tx_action(void *context, int pending)
+/**
+ * Supply information about the physical device to the frontend
+ * via XenBus.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ */
+static int
+xnb_publish_backend_info(struct xnb_softc *xnb)
{
- struct mbuf *m;
- netif_t *netif;
- netif_tx_request_t txreq;
- uint16_t pending_idx;
- RING_IDX i;
- gnttab_map_grant_ref_t *mop;
- int ret, work_to_do;
- struct mbuf *txq = NULL, *txq_last = NULL;
-
- if (dealloc_cons != dealloc_prod)
- net_tx_action_dealloc();
-
- mop = tx_map_ops;
- while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
-
- /* Get a netif from the list with work to do. */
- netif = remove_from_tx_schedule_list();
-
- DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
- IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
-
- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
- if (!work_to_do) {
- netif_put(netif);
- continue;
+ struct xs_transaction xst;
+ const char *our_path;
+ int error;
+
+ our_path = xenbus_get_node(xnb->dev);
+
+ do {
+ error = xs_transaction_start(&xst);
+ if (error != 0) {
+ xenbus_dev_fatal(xnb->dev, error,
+ "Error publishing backend info "
+ "(start transaction)");
+ break;
}
- i = netif->tx.req_cons;
- rmb(); /* Ensure that we see the request before we copy it. */
- memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+ error = xs_printf(xst, our_path, "feature-sg",
+ "%d", XNB_SG);
+ if (error != 0)
+ break;
- /* If we want credit-based scheduling, coud add it here - WORK */
+ error = xs_printf(xst, our_path, "feature-gso-tcpv4",
+ "%d", XNB_GSO_TCPV4);
+ if (error != 0)
+ break;
- netif->tx.req_cons++;
+ error = xs_printf(xst, our_path, "feature-rx-copy",
+ "%d", XNB_RX_COPY);
+ if (error != 0)
+ break;
- netif_schedule_tx_work(netif);
+ error = xs_printf(xst, our_path, "feature-rx-flip",
+ "%d", XNB_RX_FLIP);
+ if (error != 0)
+ break;
- if (unlikely(txreq.size < ETHER_HDR_LEN) ||
- unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
- WPRINTF("Bad packet size: %d\n", txreq.size);
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
- continue;
+ error = xs_transaction_end(xst, 0);
+ if (error != 0 && error != EAGAIN) {
+ xenbus_dev_fatal(xnb->dev, error, "ending transaction");
+ break;
}
- /* No crossing a page as the payload mustn't fragment. */
- if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
- WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
- txreq.offset, txreq.size,
- (txreq.offset & PAGE_MASK) + txreq.size);
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
- continue;
- }
+ } while (error == EAGAIN);
- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ return (error);
+}
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (!m) {
- WPRINTF("Failed to allocate mbuf\n");
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
- break;
- }
- m->m_pkthdr.rcvif = netif->ifp;
-
- if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
- struct mbuf *n;
- MGET(n, M_DONTWAIT, MT_DATA);
- if (!(m->m_next = n)) {
- m_freem(m);
- WPRINTF("Failed to allocate second mbuf\n");
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
- break;
- }
- n->m_len = txreq.size - PKT_PROT_LEN;
- m->m_len = PKT_PROT_LEN;
- } else
- m->m_len = txreq.size;
-
- mop->host_addr = MMAP_VADDR(pending_idx);
- mop->dom = netif->domid;
- mop->ref = txreq.gref;
- mop->flags = GNTMAP_host_map | GNTMAP_readonly;
- mop++;
-
- memcpy(&pending_tx_info[pending_idx].req,
- &txreq, sizeof(txreq));
- pending_tx_info[pending_idx].netif = netif;
- *((uint16_t *)m->m_data) = pending_idx;
-
- if (txq_last)
- txq_last->m_nextpkt = m;
- else
- txq = m;
- txq_last = m;
-
- pending_cons++;
-
- if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
- break;
- }
+/**
+ * Connect to our netfront peer now that it has completed publishing
+ * its configuration into the XenStore.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ */
+static void
+xnb_connect(struct xnb_softc *xnb)
+{
+ int error;
- if (!txq)
+ if (xenbus_get_state(xnb->dev) == XenbusStateConnected)
return;
- ret = HYPERVISOR_grant_table_op(
- GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
- BUG_ON(ret);
-
- mop = tx_map_ops;
- while ((m = txq) != NULL) {
- caddr_t data;
-
- txq = m->m_nextpkt;
- m->m_nextpkt = NULL;
-
- pending_idx = *((uint16_t *)m->m_data);
- netif = pending_tx_info[pending_idx].netif;
- memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
-
- /* Check the remap error code. */
- if (unlikely(mop->status)) {
- WPRINTF("#### netback grant fails\n");
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
- m_freem(m);
- mop++;
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- continue;
- }
+ if (xnb_collect_xenstore_info(xnb) != 0)
+ return;
-#if 0
- /* Can't do this in FreeBSD since vtophys() returns the pfn */
- /* of the remote domain who loaned us the machine page - DPT */
- xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
- mop->dev_bus_addr >> PAGE_SHIFT;
-#endif
- grant_tx_handle[pending_idx] = mop->handle;
-
- /* Setup data in mbuf (lengths are already set) */
- data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
- bcopy(data, m->m_data, m->m_len);
- if (m->m_next) {
- struct mbuf *n = m->m_next;
- MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
- (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
- n->m_data = &data[PKT_PROT_LEN];
- } else {
- /* Schedule a response immediately. */
- netif_idx_release(pending_idx);
- }
+ xnb->flags &= ~XNBF_SHUTDOWN;
- if ((txreq.flags & NETTXF_data_validated)) {
- /* Tell the stack the checksums are okay */
- m->m_pkthdr.csum_flags |=
- (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
- m->m_pkthdr.csum_data = 0xffff;
- }
+ /* Read front end configuration. */
- /* If necessary, inform stack to compute the checksums if it forwards the packet */
- if ((txreq.flags & NETTXF_csum_blank)) {
- struct ether_header *eh = mtod(m, struct ether_header *);
- if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
- struct ip *ip = (struct ip *)&m->m_data[14];
- if (ip->ip_p == IPPROTO_TCP)
- m->m_pkthdr.csum_flags |= CSUM_TCP;
- else if (ip->ip_p == IPPROTO_UDP)
- m->m_pkthdr.csum_flags |= CSUM_UDP;
- }
- }
+ /* Allocate resources whose size depends on front-end configuration. */
+ error = xnb_alloc_communication_mem(xnb);
+ if (error != 0) {
+ xenbus_dev_fatal(xnb->dev, error,
+ "Unable to allocate communication memory");
+ return;
+ }
- netif->ifp->if_ibytes += m->m_pkthdr.len;
- netif->ifp->if_ipackets++;
+ /*
+ * Connect communication channel.
+ */
+ error = xnb_connect_comms(xnb);
+ if (error != 0) {
+ /* Specific errors are reported by xnb_connect_comms(). */
+ return;
+ }
+ xnb->carrier = 1;
+
+ /* Ready for I/O. */
+ xenbus_set_state(xnb->dev, XenbusStateConnected);
+}
+
+/*-------------------------- Device Teardown Support -------------------------*/
+/**
+ * Perform device shutdown functions.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ *
+ * Mark this instance as shutting down, wait for any active requests
+ * to drain, disconnect from the front-end, and notify any waiters (e.g.
+ * a thread invoking our detach method) that detach can now proceed.
+ */
+static int
+xnb_shutdown(struct xnb_softc *xnb)
+{
+ /*
+ * Due to the need to drop our mutex during some
+ * xenbus operations, it is possible for two threads
+ * to attempt to close out shutdown processing at
+ * the same time. Tell the caller that hits this
+ * race to try back later.
+ */
+ if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0)
+ return (EAGAIN);
- DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
- m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
- DPRINTF_MBUF_LEN(m, 128);
+ xnb->flags |= XNBF_SHUTDOWN;
- (*netif->ifp->if_input)(netif->ifp, m);
+ xnb->flags |= XNBF_IN_SHUTDOWN;
- mop++;
+ mtx_unlock(&xnb->sc_lock);
+ /* Free the network interface */
+ xnb->carrier = 0;
+ if (xnb->xnb_ifp != NULL) {
+ ether_ifdetach(xnb->xnb_ifp);
+ if_free(xnb->xnb_ifp);
+ xnb->xnb_ifp = NULL;
}
+ mtx_lock(&xnb->sc_lock);
+
+ xnb_disconnect(xnb);
+
+ mtx_unlock(&xnb->sc_lock);
+ if (xenbus_get_state(xnb->dev) < XenbusStateClosing)
+ xenbus_set_state(xnb->dev, XenbusStateClosing);
+ mtx_lock(&xnb->sc_lock);
+
+ xnb->flags &= ~XNBF_IN_SHUTDOWN;
+
+
+ /* Indicate to xnb_detach() that is it safe to proceed. */
+ wakeup(xnb);
+
+ return (0);
}
-/* Handle interrupt from a frontend */
+/**
+ * Report an attach time error to the console and Xen, and cleanup
+ * this instance by forcing immediate detach processing.
+ *
+ * \param xnb Per-instance xnb configuration structure.
+ * \param err Errno describing the error.
+ * \param fmt Printf style format and arguments
+ */
static void
-netback_intr(void *arg)
+xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...)
{
- netif_t *netif = arg;
- DDPRINTF("%s\n", IFNAME(netif));
- add_to_tx_schedule_list_tail(netif);
- maybe_schedule_tx_action();
+ va_list ap;
+ va_list ap_hotplug;
+
+ va_start(ap, fmt);
+ va_copy(ap_hotplug, ap);
+ xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
+ "hotplug-error", fmt, ap_hotplug);
+ va_end(ap_hotplug);
+ xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
+ "hotplug-status", "error");
+
+ xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
+ va_end(ap);
+
+ xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
+ "online", "0");
+ xnb_detach(xnb->dev);
}
-/* Removes netif from front of list and does not call netif_put() (caller must) */
-static netif_t *
-remove_from_rx_schedule_list(void)
+/*---------------------------- NewBus Entrypoints ----------------------------*/
+/**
+ * Inspect a XenBus device and claim it if is of the appropriate type.
+ *
+ * \param dev NewBus device object representing a candidate XenBus device.
+ *
+ * \return 0 for success, errno codes for failure.
+ */
+static int
+xnb_probe(device_t dev)
{
- netif_t *netif;
-
- mtx_lock(&rx_sched_list_lock);
-
- if ((netif = STAILQ_FIRST(&rx_sched_list))) {
- STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
- STAILQ_NEXT(netif, next_rx) = NULL;
- netif->on_rx_sched_list = 0;
+ if (!strcmp(xenbus_get_type(dev), "vif")) {
+ DPRINTF("Claiming device %d, %s\n", device_get_unit(dev),
+ devclass_get_name(device_get_devclass(dev)));
+ device_set_desc(dev, "Backend Virtual Network Device");
+ device_quiet(dev);
+ return (0);
}
-
- mtx_unlock(&rx_sched_list_lock);
-
- return netif;
+ return (ENXIO);
}
-/* Adds netif to end of list and calls netif_get() */
+/**
+ * Setup sysctl variables to control various Network Back parameters.
+ *
+ * \param xnb Xen Net Back softc.
+ *
+ */
static void
-add_to_rx_schedule_list_tail(netif_t *netif)
+xnb_setup_sysctl(struct xnb_softc *xnb)
{
- if (netif->on_rx_sched_list)
+ struct sysctl_ctx_list *sysctl_ctx = NULL;
+ struct sysctl_oid *sysctl_tree = NULL;
+
+ sysctl_ctx = device_get_sysctl_ctx(xnb->dev);
+ if (sysctl_ctx == NULL)
return;
- mtx_lock(&rx_sched_list_lock);
- if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- netif_get(netif);
- STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
- netif->on_rx_sched_list = 1;
- }
- mtx_unlock(&rx_sched_list_lock);
+ sysctl_tree = device_get_sysctl_tree(xnb->dev);
+ if (sysctl_tree == NULL)
+ return;
+
+#ifdef XNB_DEBUG
+ SYSCTL_ADD_PROC(sysctl_ctx,
+ SYSCTL_CHILDREN(sysctl_tree),
+ OID_AUTO,
+ "unit_test_results",
+ CTLTYPE_STRING | CTLFLAG_RD,
+ xnb,
+ 0,
+ xnb_unit_test_main,
+ "A",
+ "Results of builtin unit tests");
+
+ SYSCTL_ADD_PROC(sysctl_ctx,
+ SYSCTL_CHILDREN(sysctl_tree),
+ OID_AUTO,
+ "dump_rings",
+ CTLTYPE_STRING | CTLFLAG_RD,
+ xnb,
+ 0,
+ xnb_dump_rings,
+ "A",
+ "Xennet Back Rings");
+#endif /* XNB_DEBUG */
}
-static int
-make_rx_response(netif_t *netif, uint16_t id, int8_t st,
- uint16_t offset, uint16_t size, uint16_t flags)
+/**
+ * Create a network device.
+ * @param handle device handle
+ */
+int
+create_netdev(device_t dev)
{
- RING_IDX i = netif->rx.rsp_prod_pvt;
- netif_rx_response_t *resp;
- int notify;
+ struct ifnet *ifp;
+ struct xnb_softc *xnb;
+ int err = 0;
- resp = RING_GET_RESPONSE(&netif->rx, i);
- resp->offset = offset;
- resp->flags = flags;
- resp->id = id;
- resp->status = (int16_t)size;
- if (st < 0)
- resp->status = (int16_t)st;
+ xnb = device_get_softc(dev);
+ mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF);
+ mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF);
+ mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF);
+
+ xnb->dev = dev;
+
+ ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts);
+ ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
+ ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL);
+
+ err = xen_net_read_mac(dev, xnb->mac);
+ if (err == 0) {
+ /* Set up ifnet structure */
+ ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER);
+ ifp->if_softc = xnb;
+ if_initname(ifp, "xnb", device_get_unit(dev));
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = xnb_ioctl;
+ ifp->if_output = ether_output;
+ ifp->if_start = xnb_start;
+#ifdef notyet
+ ifp->if_watchdog = xnb_watchdog;
+#endif
+ ifp->if_init = xnb_ifinit;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1;
- DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
- i, resp->offset, resp->flags, resp->id, resp->status);
+ ifp->if_hwassist = XNB_CSUM_FEATURES;
+ ifp->if_capabilities = IFCAP_HWCSUM;
+ ifp->if_capenable = IFCAP_HWCSUM;
- netif->rx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+ ether_ifattach(ifp, xnb->mac);
+ xnb->carrier = 0;
+ }
- return notify;
+ return err;
}
+/**
+ * Attach to a XenBus device that has been claimed by our probe routine.
+ *
+ * \param dev NewBus device object representing this Xen Net Back instance.
+ *
+ * \return 0 for success, errno codes for failure.
+ */
static int
-netif_rx(netif_t *netif)
+xnb_attach(device_t dev)
{
- struct ifnet *ifp = netif->ifp;
- struct mbuf *m;
- multicall_entry_t *mcl;
- mmu_update_t *mmu;
- gnttab_transfer_t *gop;
- unsigned long vdata, old_mfn, new_mfn;
- struct mbuf *rxq = NULL, *rxq_last = NULL;
- int ret, notify = 0, pkts_dequeued = 0;
+ struct xnb_softc *xnb;
+ int error;
+ xnb_ring_type_t i;
+
+ error = create_netdev(dev);
+ if (error != 0) {
+ xenbus_dev_fatal(dev, error, "creating netdev");
+ return (error);
+ }
- DDPRINTF("%s\n", IFNAME(netif));
+ DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
- mcl = rx_mcl;
- mmu = rx_mmu;
- gop = grant_rx_op;
+ /*
+ * Basic initialization.
+ * After this block it is safe to call xnb_detach()
+ * to clean up any allocated data for this instance.
+ */
+ xnb = device_get_softc(dev);
+ xnb->otherend_id = xenbus_get_otherend_id(dev);
+ for (i=0; i < XNB_NUM_RING_TYPES; i++) {
+ xnb->ring_configs[i].ring_pages = 1;
+ }
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-
- /* Quit if the target domain has no receive buffers */
- if (netif->rx.req_cons == netif->rx.sring->req_prod)
- break;
+ /*
+ * Setup sysctl variables.
+ */
+ xnb_setup_sysctl(xnb);
+
+ /* Update hot-plug status to satisfy xend. */
+ error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
+ "hotplug-status", "connected");
+ if (error != 0) {
+ xnb_attach_failed(xnb, error, "writing %s/hotplug-status",
+ xenbus_get_node(xnb->dev));
+ return (error);
+ }
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL)
- break;
+ if ((error = xnb_publish_backend_info(xnb)) != 0) {
+ /*
+ * If we can't publish our data, we cannot participate
+ * in this connection, and waiting for a front-end state
+ * change will not help the situation.
+ */
+ xnb_attach_failed(xnb, error,
+ "Publishing backend status for %s",
+ xenbus_get_node(xnb->dev));
+ return error;
+ }
- pkts_dequeued++;
-
- /* Check if we need to copy the data */
- if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
- (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
- struct mbuf *n;
-
- DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
- m->m_flags,
- (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
- (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
- (unsigned int)m->m_next);
-
- /* Make copy */
- MGETHDR(n, M_DONTWAIT, MT_DATA);
- if (!n)
- goto drop;
-
- MCLGET(n, M_DONTWAIT);
- if (!(n->m_flags & M_EXT)) {
- m_freem(n);
- goto drop;
- }
+ /* Tell the front end that we are ready to connect. */
+ xenbus_set_state(dev, XenbusStateInitWait);
+
+ return (0);
+}
- /* Leave space at front and keep current alignment */
- n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
+/**
+ * Detach from a net back device instance.
+ *
+ * \param dev NewBus device object representing this Xen Net Back instance.
+ *
+ * \return 0 for success, errno codes for failure.
+ *
+ * \note A net back device may be detached at any time in its life-cycle,
+ * including part way through the attach process. For this reason,
+ * initialization order and the intialization state checks in this
+ * routine must be carefully coupled so that attach time failures
+ * are gracefully handled.
+ */
+static int
+xnb_detach(device_t dev)
+{
+ struct xnb_softc *xnb;
- if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
- WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
- m_freem(n);
- goto drop;
- }
- m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
- n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
- n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
- m_freem(m);
- m = n;
- }
+ DPRINTF("\n");
- vdata = (unsigned long)m->m_data;
- old_mfn = vtomach(vdata) >> PAGE_SHIFT;
+ xnb = device_get_softc(dev);
+ mtx_lock(&xnb->sc_lock);
+ while (xnb_shutdown(xnb) == EAGAIN) {
+ msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0,
+ "xnb_shutdown", 0);
+ }
+ mtx_unlock(&xnb->sc_lock);
+ DPRINTF("\n");
- if ((new_mfn = alloc_mfn()) == 0)
- goto drop;
+ mtx_destroy(&xnb->tx_lock);
+ mtx_destroy(&xnb->rx_lock);
+ mtx_destroy(&xnb->sc_lock);
+ return (0);
+}
-#ifdef XEN_NETBACK_FIXUP_CSUM
- /* Check if we need to compute a checksum. This happens */
- /* when bridging from one domain to another. */
- if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ||
- (m->m_pkthdr.csum_flags & CSUM_SCTP))
- fixup_checksum(m);
-#endif
+/**
+ * Prepare this net back device for suspension of this VM.
+ *
+ * \param dev NewBus device object representing this Xen net Back instance.
+ *
+ * \return 0 for success, errno codes for failure.
+ */
+static int
+xnb_suspend(device_t dev)
+{
+ return (0);
+}
- xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
-
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = vdata;
- mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
- mcl->args[2] = 0;
- mcl->args[3] = 0;
- mcl++;
-
- gop->mfn = old_mfn;
- gop->domid = netif->domid;
- gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
- netif->rx.req_cons++;
- gop++;
-
- mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- mmu->val = vtophys(vdata) >> PAGE_SHIFT;
- mmu++;
-
- if (rxq_last)
- rxq_last->m_nextpkt = m;
- else
- rxq = m;
- rxq_last = m;
-
- DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
- DPRINTF_MBUF_LEN(m, 128);
-
- /* Filled the batch queue? */
- if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
- break;
-
- continue;
- drop:
- DDPRINTF("dropping pkt\n");
- ifp->if_oerrors++;
- m_freem(m);
- }
+/**
+ * Perform any processing required to recover from a suspended state.
+ *
+ * \param dev NewBus device object representing this Xen Net Back instance.
+ *
+ * \return 0 for success, errno codes for failure.
+ */
+static int
+xnb_resume(device_t dev)
+{
+ return (0);
+}
- if (mcl == rx_mcl)
- return pkts_dequeued;
+/**
+ * Handle state changes expressed via the XenStore by our front-end peer.
+ *
+ * \param dev NewBus device object representing this Xen
+ * Net Back instance.
+ * \param frontend_state The new state of the front-end.
+ *
+ * \return 0 for success, errno codes for failure.
+ */
+static void
+xnb_frontend_changed(device_t dev, XenbusState frontend_state)
+{
+ struct xnb_softc *xnb;
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)rx_mmu;
- mcl->args[1] = mmu - rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
+ xnb = device_get_softc(dev);
- mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
- ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
- BUG_ON(ret != 0);
+ DPRINTF("frontend_state=%s, xnb_state=%s\n",
+ xenbus_strstate(frontend_state),
+ xenbus_strstate(xenbus_get_state(xnb->dev)));
- ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
- BUG_ON(ret != 0);
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ break;
+ case XenbusStateInitialised:
+ case XenbusStateConnected:
+ xnb_connect(xnb);
+ break;
+ case XenbusStateClosing:
+ case XenbusStateClosed:
+ mtx_lock(&xnb->sc_lock);
+ xnb_shutdown(xnb);
+ mtx_unlock(&xnb->sc_lock);
+ if (frontend_state == XenbusStateClosed)
+ xenbus_set_state(xnb->dev, XenbusStateClosed);
+ break;
+ default:
+ xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+
+/*---------------------------- Request Processing ----------------------------*/
+/**
+ * Interrupt handler bound to the shared ring's event channel.
+ * Entry point for the xennet transmit path in netback
+ * Transfers packets from the Xen ring to the host's generic networking stack
+ *
+ * \param arg Callback argument registerd during event channel
+ * binding - the xnb_softc for this instance.
+ */
+static void
+xnb_intr(void *arg)
+{
+ struct xnb_softc *xnb;
+ struct ifnet *ifp;
+ netif_tx_back_ring_t *txb;
+ RING_IDX req_prod_local;
+
+ xnb = (struct xnb_softc *)arg;
+ ifp = xnb->xnb_ifp;
+ txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
+
+ mtx_lock(&xnb->tx_lock);
+ do {
+ int notify;
+ req_prod_local = txb->sring->req_prod;
+ xen_rmb();
+
+ for (;;) {
+ struct mbuf *mbufc;
+ int err;
+
+ err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp,
+ xnb->tx_gnttab);
+ if (err || (mbufc == NULL))
+ break;
- mcl = rx_mcl;
- gop = grant_rx_op;
+ /* Send the packet to the generic network stack */
+ (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc);
+ }
- while ((m = rxq) != NULL) {
- int8_t status;
- uint16_t id, flags = 0;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
+ if (notify != 0)
+ notify_remote_via_irq(xnb->irq);
- rxq = m->m_nextpkt;
- m->m_nextpkt = NULL;
+ txb->sring->req_event = txb->req_cons + 1;
+ xen_mb();
+ } while (txb->sring->req_prod != req_prod_local) ;
+ mtx_unlock(&xnb->tx_lock);
- /* Rederive the machine addresses. */
- new_mfn = mcl->args[1] >> PAGE_SHIFT;
- old_mfn = gop->mfn;
+ xnb_start(ifp);
+}
- ifp->if_obytes += m->m_pkthdr.len;
- ifp->if_opackets++;
- /* The update_va_mapping() must not fail. */
- BUG_ON(mcl->result != 0);
+/**
+ * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring.
+ * Will read exactly 0 or 1 packets from the ring; never a partial packet.
+ * \param[out] pkt The returned packet. If there is an error building
+ * the packet, pkt.list_len will be set to 0.
+ * \param[in] tx_ring Pointer to the Ring that is the input to this function
+ * \param[in] start The ring index of the first potential request
+ * \return The number of requests consumed to build this packet
+ */
+static int
+xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring,
+ RING_IDX start)
+{
+ /*
+ * Outline:
+ * 1) Initialize pkt
+ * 2) Read the first request of the packet
+ * 3) Read the extras
+ * 4) Set cdr
+ * 5) Loop on the remainder of the packet
+ * 6) Finalize pkt (stuff like car_size and list_len)
+ */
+ int idx = start;
+ int discard = 0; /* whether to discard the packet */
+ int more_data = 0; /* there are more request past the last one */
+ uint16_t cdr_size = 0; /* accumulated size of requests 2 through n */
+
+ xnb_pkt_initialize(pkt);
+
+ /* Read the first request */
+ if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
+ netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
+ pkt->size = tx->size;
+ pkt->flags = tx->flags & ~NETTXF_more_data;
+ more_data = tx->flags & NETTXF_more_data;
+ pkt->list_len++;
+ pkt->car = idx;
+ idx++;
+ }
- /* Setup flags */
- if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
- flags |= NETRXF_csum_blank | NETRXF_data_validated;
- else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
- flags |= NETRXF_data_validated;
+ /* Read the extra info */
+ if ((pkt->flags & NETTXF_extra_info) &&
+ RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
+ netif_extra_info_t *ext =
+ (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx);
+ pkt->extra.type = ext->type;
+ switch (pkt->extra.type) {
+ case XEN_NETIF_EXTRA_TYPE_GSO:
+ pkt->extra.u.gso = ext->u.gso;
+ break;
+ default:
+ /*
+ * The reference Linux netfront driver will
+ * never set any other extra.type. So we don't
+ * know what to do with it. Let's print an
+ * error, then consume and discard the packet
+ */
+ printf("xnb(%s:%d): Unknown extra info type %d."
+ " Discarding packet\n",
+ __func__, __LINE__, pkt->extra.type);
+ xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring,
+ start));
+ xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring,
+ idx));
+ discard = 1;
+ break;
+ }
- /* Check the reassignment error code. */
- status = NETIF_RSP_OKAY;
- if (gop->status != 0) {
- DPRINTF("Bad status %d from grant transfer to DOM%u\n",
- gop->status, netif->domid);
+ pkt->extra.flags = ext->flags;
+ if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) {
/*
- * Page no longer belongs to us unless GNTST_bad_page,
- * but that should be a fatal error anyway.
+ * The reference linux netfront driver never sets this
+ * flag (nor does any other known netfront). So we
+ * will discard the packet.
*/
- BUG_ON(gop->status == GNTST_bad_page);
- status = NETIF_RSP_ERROR;
+ printf("xnb(%s:%d): Request sets "
+ "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle "
+ "that\n", __func__, __LINE__);
+ xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
+ xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
+ discard = 1;
}
- id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
- notify |= make_rx_response(netif, id, status,
- (unsigned long)m->m_data & PAGE_MASK,
- m->m_pkthdr.len, flags);
-
- m_freem(m);
- mcl++;
- gop++;
+
+ idx++;
}
- if (notify)
- notify_remote_via_irq(netif->irq);
+ /* Set cdr. If there is not more data, cdr is invalid */
+ pkt->cdr = idx;
+
+ /* Loop on remainder of packet */
+ while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
+ netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
+ pkt->list_len++;
+ cdr_size += tx->size;
+ if (tx->flags & ~NETTXF_more_data) {
+ /* There should be no other flags set at this point */
+ printf("xnb(%s:%d): Request sets unknown flags %d "
+ "after the 1st request in the packet.\n",
+ __func__, __LINE__, tx->flags);
+ xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
+ xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
+ }
- return pkts_dequeued;
-}
+ more_data = tx->flags & NETTXF_more_data;
+ idx++;
+ }
-static void
-rx_task_timer(void *arg)
-{
- DDPRINTF("\n");
- taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+ /* Finalize packet */
+ if (more_data != 0) {
+ /* The ring ran out of requests before finishing the packet */
+ xnb_pkt_invalidate(pkt);
+ idx = start; /* tell caller that we consumed no requests */
+ } else {
+ /* Calculate car_size */
+ pkt->car_size = pkt->size - cdr_size;
+ }
+ if (discard != 0) {
+ xnb_pkt_invalidate(pkt);
+ }
+
+ return idx - start;
}
+
+/**
+ * Respond to all the requests that constituted pkt. Builds the responses and
+ * writes them to the ring, but doesn't push them to the shared ring.
+ * \param[in] pkt the packet that needs a response
+ * \param[in] error true if there was an error handling the packet, such
+ * as in the hypervisor copy op or mbuf allocation
+ * \param[out] ring Responses go here
+ */
static void
-net_rx_action(void *context, int pending)
+xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring,
+ int error)
{
- netif_t *netif, *last_zero_work = NULL;
-
- DDPRINTF("\n");
-
- while ((netif = remove_from_rx_schedule_list())) {
- struct ifnet *ifp = netif->ifp;
-
- if (netif == last_zero_work) {
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- add_to_rx_schedule_list_tail(netif);
- netif_put(netif);
- if (!STAILQ_EMPTY(&rx_sched_list))
- callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
- break;
- }
-
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- if (netif_rx(netif))
- last_zero_work = NULL;
- else if (!last_zero_work)
- last_zero_work = netif;
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- add_to_rx_schedule_list_tail(netif);
+ /*
+ * Outline:
+ * 1) Respond to the first request
+ * 2) Respond to the extra info reques
+ * Loop through every remaining request in the packet, generating
+ * responses that copy those requests' ids and sets the status
+ * appropriately.
+ */
+ netif_tx_request_t *tx;
+ netif_tx_response_t *rsp;
+ int i;
+ uint16_t status;
+
+ status = (xnb_pkt_is_valid(pkt) == 0) || error ?
+ NETIF_RSP_ERROR : NETIF_RSP_OKAY;
+ KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car),
+ ("Cannot respond to ring requests out of order"));
+
+ if (pkt->list_len >= 1) {
+ uint16_t id;
+ tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
+ id = tx->id;
+ rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
+ rsp->id = id;
+ rsp->status = status;
+ ring->rsp_prod_pvt++;
+
+ if (pkt->flags & NETRXF_extra_info) {
+ rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
+ rsp->status = NETIF_RSP_NULL;
+ ring->rsp_prod_pvt++;
}
+ }
- netif_put(netif);
+ for (i=0; i < pkt->list_len - 1; i++) {
+ uint16_t id;
+ tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
+ id = tx->id;
+ rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
+ rsp->id = id;
+ rsp->status = status;
+ ring->rsp_prod_pvt++;
}
}
-static void
-netback_start(struct ifnet *ifp)
+/**
+ * Create an mbuf chain to represent a packet. Initializes all of the headers
+ * in the mbuf chain, but does not copy the data. The returned chain must be
+ * free()'d when no longer needed
+ * \param[in] pkt A packet to model the mbuf chain after
+ * \return A newly allocated mbuf chain, possibly with clusters attached.
+ * NULL on failure
+ */
+static struct mbuf*
+xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp)
{
- netif_t *netif = (netif_t *)ifp->if_softc;
+ /**
+ * \todo consider using a memory pool for mbufs instead of
+ * reallocating them for every packet
+ */
+ /** \todo handle extra data */
+ struct mbuf *m;
- DDPRINTF("%s\n", IFNAME(netif));
+ m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA);
- add_to_rx_schedule_list_tail(netif);
- taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+ if (m != NULL) {
+ m->m_pkthdr.rcvif = ifp;
+ if (pkt->flags & NETTXF_data_validated) {
+ /*
+ * We lie to the host OS and always tell it that the
+ * checksums are ok, because the packet is unlikely to
+ * get corrupted going across domains.
+ */
+ m->m_pkthdr.csum_flags = (
+ CSUM_IP_CHECKED |
+ CSUM_IP_VALID |
+ CSUM_DATA_VALID |
+ CSUM_PSEUDO_HDR
+ );
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+ }
+ return m;
}
-/* Map a grant ref to a ring */
+/**
+ * Build a gnttab_copy table that can be used to copy data from a pkt
+ * to an mbufc. Does not actually perform the copy. Always uses gref's on
+ * the packet side.
+ * \param[in] pkt pkt's associated requests form the src for
+ * the copy operation
+ * \param[in] mbufc mbufc's storage forms the dest for the copy operation
+ * \param[out] gnttab Storage for the returned grant table
+ * \param[in] txb Pointer to the backend ring structure
+ * \param[in] otherend_id The domain ID of the other end of the copy
+ * \return The number of gnttab entries filled
+ */
static int
-map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
+ gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb,
+ domid_t otherend_id)
{
- struct gnttab_map_grant_ref op;
-
- ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
- if (ring->va == 0)
- return ENOMEM;
- op.host_addr = ring->va;
- op.flags = GNTMAP_host_map;
- op.ref = ref;
- op.dom = dom;
- HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
- if (op.status) {
- WPRINTF("grant table op err=%d\n", op.status);
- kmem_free(kernel_map, ring->va, PAGE_SIZE);
- ring->va = 0;
- return EACCES;
+ const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
+ int gnt_idx = 0; /* index into grant table */
+ RING_IDX r_idx = pkt->car; /* index into tx ring buffer */
+ int r_ofs = 0; /* offset of next data within tx request's data area */
+ int m_ofs = 0; /* offset of next data within mbuf's data area */
+ /* size in bytes that still needs to be represented in the table */
+ uint16_t size_remaining = pkt->size;
+
+ while (size_remaining > 0) {
+ const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx);
+ const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs;
+ const size_t req_size =
+ r_idx == pkt->car ? pkt->car_size : txq->size;
+ const size_t pkt_space = req_size - r_ofs;
+ /*
+ * space is the largest amount of data that can be copied in the
+ * grant table's next entry
+ */
+ const size_t space = MIN(pkt_space, mbuf_space);
+
+ /* TODO: handle this error condition without panicking */
+ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
+
+ gnttab[gnt_idx].source.u.ref = txq->gref;
+ gnttab[gnt_idx].source.domid = otherend_id;
+ gnttab[gnt_idx].source.offset = txq->offset + r_ofs;
+ gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn(
+ mtod(mbuf, vm_offset_t) + m_ofs);
+ gnttab[gnt_idx].dest.offset = virt_to_offset(
+ mtod(mbuf, vm_offset_t) + m_ofs);
+ gnttab[gnt_idx].dest.domid = DOMID_SELF;
+ gnttab[gnt_idx].len = space;
+ gnttab[gnt_idx].flags = GNTCOPY_source_gref;
+
+ gnt_idx++;
+ r_ofs += space;
+ m_ofs += space;
+ size_remaining -= space;
+ if (req_size - r_ofs <= 0) {
+ /* Must move to the next tx request */
+ r_ofs = 0;
+ r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
+ }
+ if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) {
+ /* Must move to the next mbuf */
+ m_ofs = 0;
+ mbuf = mbuf->m_next;
+ }
}
- ring->handle = op.handle;
- ring->bus_addr = op.dev_bus_addr;
-
- return 0;
+ return gnt_idx;
}
-/* Unmap grant ref for a ring */
+/**
+ * Check the status of the grant copy operations, and update mbufs various
+ * non-data fields to reflect the data present.
+ * \param[in,out] mbufc mbuf chain to update. The chain must be valid and of
+ * the correct length, and data should already be present
+ * \param[in] gnttab A grant table for a just completed copy op
+ * \param[in] n_entries The number of valid entries in the grant table
+ */
static void
-unmap_ring(struct ring_ref *ring)
+xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab,
+ int n_entries)
{
- struct gnttab_unmap_grant_ref op;
-
- op.host_addr = ring->va;
- op.dev_bus_addr = ring->bus_addr;
- op.handle = ring->handle;
- HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
- if (op.status)
- WPRINTF("grant table op err=%d\n", op.status);
+ struct mbuf *mbuf = mbufc;
+ int i;
+ size_t total_size = 0;
+
+ for (i = 0; i < n_entries; i++) {
+ KASSERT(gnttab[i].status == GNTST_okay,
+ ("Some gnttab_copy entry had error status %hd\n",
+ gnttab[i].status));
+
+ mbuf->m_len += gnttab[i].len;
+ total_size += gnttab[i].len;
+ if (M_TRAILINGSPACE(mbuf) <= 0) {
+ mbuf = mbuf->m_next;
+ }
+ }
+ mbufc->m_pkthdr.len = total_size;
- kmem_free(kernel_map, ring->va, PAGE_SIZE);
- ring->va = 0;
+ xnb_add_mbuf_cksum(mbufc);
}
+/**
+ * Dequeue at most one packet from the shared ring
+ * \param[in,out] txb Netif tx ring. A packet will be removed from it, and
+ * its private indices will be updated. But the indices
+ * will not be pushed to the shared ring.
+ * \param[in] ifnet Interface to which the packet will be sent
+ * \param[in] otherend Domain ID of the other end of the ring
+ * \param[out] mbufc The assembled mbuf chain, ready to send to the generic
+ * networking stack
+ * \param[in,out] gnttab Pointer to enough memory for a grant table. We make
+ * this a function parameter so that we will take less
+ * stack space.
+ * \return An error code
+ */
static int
-connect_rings(netif_t *netif)
+xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc,
+ struct ifnet *ifnet, gnttab_copy_table gnttab)
{
- struct xenbus_device *xdev = netif->xdev;
- netif_tx_sring_t *txs;
- netif_rx_sring_t *rxs;
- unsigned long tx_ring_ref, rx_ring_ref;
- evtchn_port_t evtchn;
- evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
- int err;
+ struct xnb_pkt pkt;
+ /* number of tx requests consumed to build the last packet */
+ int num_consumed;
+ int nr_ents;
- // Grab FE data and map his memory
- err = xenbus_gather(NULL, xdev->otherend,
- "tx-ring-ref", "%lu", &tx_ring_ref,
- "rx-ring-ref", "%lu", &rx_ring_ref,
- "event-channel", "%u", &evtchn, NULL);
- if (err) {
- xenbus_dev_fatal(xdev, err,
- "reading %s/ring-ref and event-channel",
- xdev->otherend);
- return err;
- }
+ *mbufc = NULL;
+ num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons);
+ if (num_consumed == 0)
+ return 0; /* Nothing to receive */
- err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
- if (err) {
- xenbus_dev_fatal(xdev, err, "mapping tx ring");
- return err;
- }
- txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
- BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+ /* update statistics indepdent of errors */
+ ifnet->if_ipackets++;
- err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
- if (err) {
- unmap_ring(&netif->tx_ring_ref);
- xenbus_dev_fatal(xdev, err, "mapping rx ring");
- return err;
- }
- rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
- BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
-
- op.u.bind_interdomain.remote_dom = netif->domid;
- op.u.bind_interdomain.remote_port = evtchn;
- err = HYPERVISOR_event_channel_op(&op);
- if (err) {
- unmap_ring(&netif->tx_ring_ref);
- unmap_ring(&netif->rx_ring_ref);
- xenbus_dev_fatal(xdev, err, "binding event channel");
- return err;
+ /*
+ * if we got here, then 1 or more requests was consumed, but the packet
+ * is not necesarily valid.
+ */
+ if (xnb_pkt_is_valid(&pkt) == 0) {
+ /* got a garbage packet, respond and drop it */
+ xnb_txpkt2rsp(&pkt, txb, 1);
+ txb->req_cons += num_consumed;
+ DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n",
+ num_consumed);
+ ifnet->if_ierrors++;
+ return EINVAL;
}
- netif->evtchn = op.u.bind_interdomain.local_port;
- /* bind evtchn to irq handler */
- netif->irq =
- bind_evtchn_to_irqhandler(netif->evtchn, "netback",
- netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
+ *mbufc = xnb_pkt2mbufc(&pkt, ifnet);
+
+ if (*mbufc == NULL) {
+ /*
+ * Couldn't allocate mbufs. Respond and drop the packet. Do
+ * not consume the requests
+ */
+ xnb_txpkt2rsp(&pkt, txb, 1);
+ DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n",
+ num_consumed);
+ ifnet->if_iqdrops++;
+ return ENOMEM;
+ }
- netif->rings_connected = 1;
+ nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend);
- DPRINTF("%s connected! evtchn=%d irq=%d\n",
- IFNAME(netif), netif->evtchn, netif->irq);
+ if (nr_ents > 0) {
+ int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+ gnttab, nr_ents);
+ KASSERT(hv_ret == 0,
+ ("HYPERVISOR_grant_table_op returned %d\n", hv_ret));
+ xnb_update_mbufc(*mbufc, gnttab, nr_ents);
+ }
+ xnb_txpkt2rsp(&pkt, txb, 0);
+ txb->req_cons += num_consumed;
return 0;
}
-static void
-disconnect_rings(netif_t *netif)
+/**
+ * Create an xnb_pkt based on the contents of an mbuf chain.
+ * \param[in] mbufc mbuf chain to transform into a packet
+ * \param[out] pkt Storage for the newly generated xnb_pkt
+ * \param[in] start The ring index of the first available slot in the rx
+ * ring
+ * \param[in] space The number of free slots in the rx ring
+ * \retval 0 Success
+ * \retval EINVAL mbufc was corrupt or not convertible into a pkt
+ * \retval EAGAIN There was not enough space in the ring to queue the
+ * packet
+ */
+static int
+xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt,
+ RING_IDX start, int space)
{
- DPRINTF("\n");
- if (netif->rings_connected) {
- unbind_from_irqhandler(netif->irq, netif->irq_cookie);
- netif->irq = 0;
- unmap_ring(&netif->tx_ring_ref);
- unmap_ring(&netif->rx_ring_ref);
- netif->rings_connected = 0;
- }
-}
+ int retval = 0;
-static void
-connect(netif_t *netif)
-{
- if (!netif->xdev ||
- !netif->attached ||
- netif->frontend_state != XenbusStateConnected) {
- return;
- }
+ if ((mbufc == NULL) ||
+ ( (mbufc->m_flags & M_PKTHDR) == 0) ||
+ (mbufc->m_pkthdr.len == 0)) {
+ xnb_pkt_invalidate(pkt);
+ retval = EINVAL;
+ } else {
+ int slots_required;
+
+ xnb_pkt_validate(pkt);
+ pkt->flags = 0;
+ pkt->size = mbufc->m_pkthdr.len;
+ pkt->car = start;
+ pkt->car_size = mbufc->m_len;
+
+ if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) {
+ pkt->flags |= NETRXF_extra_info;
+ pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz;
+ pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ pkt->extra.u.gso.pad = 0;
+ pkt->extra.u.gso.features = 0;
+ pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO;
+ pkt->extra.flags = 0;
+ pkt->cdr = start + 2;
+ } else {
+ pkt->cdr = start + 1;
+ }
+ if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) {
+ pkt->flags |=
+ (NETRXF_csum_blank | NETRXF_data_validated);
+ }
- if (!connect_rings(netif)) {
- xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
+ /*
+ * Each ring response can have up to PAGE_SIZE of data.
+ * Assume that we can defragment the mbuf chain efficiently
+ * into responses so that each response but the last uses all
+ * PAGE_SIZE bytes.
+ */
+ pkt->list_len = (pkt->size + PAGE_SIZE - 1) / PAGE_SIZE;
- /* Turn on interface */
- netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
- netif->ifp->if_flags |= IFF_UP;
+ if (pkt->list_len > 1) {
+ pkt->flags |= NETRXF_more_data;
+ }
+
+ slots_required = pkt->list_len +
+ (pkt->flags & NETRXF_extra_info ? 1 : 0);
+ if (slots_required > space) {
+ xnb_pkt_invalidate(pkt);
+ retval = EAGAIN;
+ }
}
+
+ return retval;
}
+/**
+ * Build a gnttab_copy table that can be used to copy data from an mbuf chain
+ * to the frontend's shared buffers. Does not actually perform the copy.
+ * Always uses gref's on the other end's side.
+ * \param[in] pkt pkt's associated responses form the dest for the copy
+ * operatoin
+ * \param[in] mbufc The source for the copy operation
+ * \param[out] gnttab Storage for the returned grant table
+ * \param[in] rxb Pointer to the backend ring structure
+ * \param[in] otherend_id The domain ID of the other end of the copy
+ * \return The number of gnttab entries filled
+ */
static int
-netback_remove(struct xenbus_device *xdev)
+xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
+ gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb,
+ domid_t otherend_id)
{
- netif_t *netif = xdev->data;
- device_t ndev;
-
- DPRINTF("remove %s\n", xdev->nodename);
- if ((ndev = netif->ndev)) {
- netif->ndev = NULL;
- mtx_lock(&Giant);
- device_detach(ndev);
- mtx_unlock(&Giant);
+ const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
+ int gnt_idx = 0; /* index into grant table */
+ RING_IDX r_idx = pkt->car; /* index into rx ring buffer */
+ int r_ofs = 0; /* offset of next data within rx request's data area */
+ int m_ofs = 0; /* offset of next data within mbuf's data area */
+ /* size in bytes that still needs to be represented in the table */
+ uint16_t size_remaining;
+
+ size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0;
+
+ while (size_remaining > 0) {
+ const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx);
+ const size_t mbuf_space = mbuf->m_len - m_ofs;
+ /* Xen shared pages have an implied size of PAGE_SIZE */
+ const size_t req_size = PAGE_SIZE;
+ const size_t pkt_space = req_size - r_ofs;
+ /*
+ * space is the largest amount of data that can be copied in the
+ * grant table's next entry
+ */
+ const size_t space = MIN(pkt_space, mbuf_space);
+
+ /* TODO: handle this error condition without panicing */
+ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
+
+ gnttab[gnt_idx].dest.u.ref = rxq->gref;
+ gnttab[gnt_idx].dest.domid = otherend_id;
+ gnttab[gnt_idx].dest.offset = r_ofs;
+ gnttab[gnt_idx].source.u.gmfn = virt_to_mfn(
+ mtod(mbuf, vm_offset_t) + m_ofs);
+ gnttab[gnt_idx].source.offset = virt_to_offset(
+ mtod(mbuf, vm_offset_t) + m_ofs);
+ gnttab[gnt_idx].source.domid = DOMID_SELF;
+ gnttab[gnt_idx].len = space;
+ gnttab[gnt_idx].flags = GNTCOPY_dest_gref;
+
+ gnt_idx++;
+
+ r_ofs += space;
+ m_ofs += space;
+ size_remaining -= space;
+ if (req_size - r_ofs <= 0) {
+ /* Must move to the next rx request */
+ r_ofs = 0;
+ r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
+ }
+ if (mbuf->m_len - m_ofs <= 0) {
+ /* Must move to the next mbuf */
+ m_ofs = 0;
+ mbuf = mbuf->m_next;
+ }
}
- xdev->data = NULL;
- netif->xdev = NULL;
- netif_put(netif);
-
- return 0;
+ return gnt_idx;
}
/**
- * Entry point to this code when a new device is created. Allocate the basic
- * structures and the ring buffers for communication with the frontend.
- * Switch to Connected state.
+ * Generates responses for all the requests that constituted pkt. Builds
+ * responses and writes them to the ring, but doesn't push the shared ring
+ * indices.
+ * \param[in] pkt the packet that needs a response
+ * \param[in] gnttab The grant copy table corresponding to this packet.
+ * Used to determine how many rsp->netif_rx_response_t's to
+ * generate.
+ * \param[in] n_entries Number of relevant entries in the grant table
+ * \param[out] ring Responses go here
+ * \return The number of RX requests that were consumed to generate
+ * the responses
*/
static int
-netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab,
+ int n_entries, netif_rx_back_ring_t *ring)
{
- int err;
- long handle;
- char *bridge;
-
- DPRINTF("node=%s\n", xdev->nodename);
-
- /* Grab the handle */
- err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
- if (err != 1) {
- xenbus_dev_fatal(xdev, err, "reading handle");
- return err;
- }
+ /*
+ * This code makes the following assumptions:
+ * * All entries in gnttab set GNTCOPY_dest_gref
+ * * The entries in gnttab are grouped by their grefs: any two
+ * entries with the same gref must be adjacent
+ */
+ int error = 0;
+ int gnt_idx, i;
+ int n_responses = 0;
+ grant_ref_t last_gref = GRANT_REF_INVALID;
+ RING_IDX r_idx;
- /* Check for bridge */
- bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
- if (IS_ERR(bridge))
- bridge = NULL;
+ KASSERT(gnttab != NULL, ("Received a null granttable copy"));
- err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
- if (err) {
- xenbus_dev_fatal(xdev, err, "writing switch state");
- return err;
+ /*
+ * In the event of an error, we only need to send one response to the
+ * netfront. In that case, we musn't write any data to the responses
+ * after the one we send. So we must loop all the way through gnttab
+ * looking for errors before we generate any responses
+ *
+ * Since we're looping through the grant table anyway, we'll count the
+ * number of different gref's in it, which will tell us how many
+ * responses to generate
+ */
+ for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) {
+ int16_t status = gnttab[gnt_idx].status;
+ if (status != GNTST_okay) {
+ DPRINTF(
+ "Got error %d for hypervisor gnttab_copy status\n",
+ status);
+ error = 1;
+ break;
+ }
+ if (gnttab[gnt_idx].dest.u.ref != last_gref) {
+ n_responses++;
+ last_gref = gnttab[gnt_idx].dest.u.ref;
+ }
}
- err = netif_create(handle, xdev, bridge);
- if (err) {
- xenbus_dev_fatal(xdev, err, "creating netif");
- return err;
- }
+ if (error != 0) {
+ uint16_t id;
+ netif_rx_response_t *rsp;
+
+ id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id;
+ rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
+ rsp->id = id;
+ rsp->status = NETIF_RSP_ERROR;
+ n_responses = 1;
+ } else {
+ gnt_idx = 0;
+ const int has_extra = pkt->flags & NETRXF_extra_info;
+ if (has_extra != 0)
+ n_responses++;
- err = vif_add_dev(xdev);
- if (err) {
- netif_put((netif_t *)xdev->data);
- xenbus_dev_fatal(xdev, err, "adding vif device");
- return err;
+ for (i = 0; i < n_responses; i++) {
+ netif_rx_request_t rxq;
+ netif_rx_response_t *rsp;
+
+ r_idx = ring->rsp_prod_pvt + i;
+ /*
+ * We copy the structure of rxq instead of making a
+ * pointer because it shares the same memory as rsp.
+ */
+ rxq = *(RING_GET_REQUEST(ring, r_idx));
+ rsp = RING_GET_RESPONSE(ring, r_idx);
+ if (has_extra && (i == 1)) {
+ netif_extra_info_t *ext =
+ (netif_extra_info_t*)rsp;
+ ext->type = XEN_NETIF_EXTRA_TYPE_GSO;
+ ext->flags = 0;
+ ext->u.gso.size = pkt->extra.u.gso.size;
+ ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ ext->u.gso.pad = 0;
+ ext->u.gso.features = 0;
+ } else {
+ rsp->id = rxq.id;
+ rsp->status = GNTST_okay;
+ rsp->offset = 0;
+ rsp->flags = 0;
+ if (i < pkt->list_len - 1)
+ rsp->flags |= NETRXF_more_data;
+ if ((i == 0) && has_extra)
+ rsp->flags |= NETRXF_extra_info;
+ if ((i == 0) &&
+ (pkt->flags & NETRXF_data_validated)) {
+ rsp->flags |= NETRXF_data_validated;
+ rsp->flags |= NETRXF_csum_blank;
+ }
+ rsp->status = 0;
+ for (; gnttab[gnt_idx].dest.u.ref == rxq.gref;
+ gnt_idx++) {
+ rsp->status += gnttab[gnt_idx].len;
+ }
+ }
+ }
}
- return 0;
+ ring->req_cons += n_responses;
+ ring->rsp_prod_pvt += n_responses;
+ return n_responses;
}
/**
- * We are reconnecting to the backend, due to a suspend/resume, or a backend
- * driver restart. We tear down our netif structure and recreate it, but
- * leave the device-layer structures intact so that this is transparent to the
- * rest of the kernel.
- */
-static int netback_resume(struct xenbus_device *xdev)
-{
- DPRINTF("node=%s\n", xdev->nodename);
- return 0;
-}
-
-
-/**
- * Callback received when the frontend's state changes.
+ * Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf
+ * in the chain must start with a struct ether_header.
+ *
+ * XXX This function will perform incorrectly on UDP packets that are split up
+ * into multiple ethernet frames.
*/
-static void frontend_changed(struct xenbus_device *xdev,
- XenbusState frontend_state)
+static void
+xnb_add_mbuf_cksum(struct mbuf *mbufc)
{
- netif_t *netif = xdev->data;
+ struct ether_header *eh;
+ struct ip *iph;
+ uint16_t ether_type;
+
+ eh = mtod(mbufc, struct ether_header*);
+ ether_type = ntohs(eh->ether_type);
+ if (ether_type != ETHERTYPE_IP) {
+ /* Nothing to calculate */
+ return;
+ }
- DPRINTF("state=%d\n", frontend_state);
-
- netif->frontend_state = frontend_state;
+ iph = (struct ip*)(eh + 1);
+ if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
+ iph->ip_sum = 0;
+ iph->ip_sum = in_cksum_hdr(iph);
+ }
- switch (frontend_state) {
- case XenbusStateInitialising:
- case XenbusStateInitialised:
- break;
- case XenbusStateConnected:
- connect(netif);
- break;
- case XenbusStateClosing:
- xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+ switch (iph->ip_p) {
+ case IPPROTO_TCP:
+ if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
+ size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip);
+ struct tcphdr *th = (struct tcphdr*)(iph + 1);
+ th->th_sum = in_pseudo(iph->ip_src.s_addr,
+ iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen));
+ th->th_sum = in_cksum_skip(mbufc,
+ sizeof(struct ether_header) + ntohs(iph->ip_len),
+ sizeof(struct ether_header) + (iph->ip_hl << 2));
+ }
break;
- case XenbusStateClosed:
- xenbus_remove_device(xdev);
+ case IPPROTO_UDP:
+ if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
+ size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip);
+ struct udphdr *uh = (struct udphdr*)(iph + 1);
+ uh->uh_sum = in_pseudo(iph->ip_src.s_addr,
+ iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen));
+ uh->uh_sum = in_cksum_skip(mbufc,
+ sizeof(struct ether_header) + ntohs(iph->ip_len),
+ sizeof(struct ether_header) + (iph->ip_hl << 2));
+ }
break;
- case XenbusStateUnknown:
- case XenbusStateInitWait:
- xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
- frontend_state);
+ default:
break;
}
}
-/* ** Driver registration ** */
-
-static struct xenbus_device_id netback_ids[] = {
- { "vif" },
- { "" }
-};
-
-static struct xenbus_driver netback = {
- .name = "netback",
- .ids = netback_ids,
- .probe = netback_probe,
- .remove = netback_remove,
- .resume= netback_resume,
- .otherend_changed = frontend_changed,
-};
-
static void
-netback_init(void *unused)
+xnb_stop(struct xnb_softc *xnb)
{
- callout_init(&rx_task_callout, CALLOUT_MPSAFE);
-
- mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
- BUG_ON(!mmap_vstart);
-
- pending_cons = 0;
- for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
- pending_ring[pending_prod] = pending_prod;
-
- TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
- TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
- mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
- mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
-
- DPRINTF("registering %s\n", netback.name);
+ struct ifnet *ifp;
- xenbus_register_backend(&netback);
+ mtx_assert(&xnb->sc_lock, MA_OWNED);
+ ifp = xnb->xnb_ifp;
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ if_link_state_change(ifp, LINK_STATE_DOWN);
}
-SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
-
static int
-vif_add_dev(struct xenbus_device *xdev)
+xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- netif_t *netif = xdev->data;
- device_t nexus, ndev;
- devclass_t dc;
- int err = 0;
-
- mtx_lock(&Giant);
-
- /* We will add a vif device as a child of nexus0 (for now) */
- if (!(dc = devclass_find("nexus")) ||
- !(nexus = devclass_get_device(dc, 0))) {
- WPRINTF("could not find nexus0!\n");
- err = ENOENT;
- goto done;
- }
-
+ struct xnb_softc *xnb = ifp->if_softc;
+#ifdef INET
+ struct ifreq *ifr = (struct ifreq*) data;
+ struct ifaddr *ifa = (struct ifaddr*)data;
+#endif
+ int error = 0;
- /* Create a newbus device representing the vif */
- ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
- if (!ndev) {
- WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
- err = EFAULT;
- goto done;
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ mtx_lock(&xnb->sc_lock);
+ if (ifp->if_flags & IFF_UP) {
+ xnb_ifinit_locked(xnb);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ xnb_stop(xnb);
+ }
+ }
+ /*
+ * Note: netfront sets a variable named xn_if_flags
+ * here, but that variable is never read
+ */
+ mtx_unlock(&xnb->sc_lock);
+ break;
+ case SIOCSIFADDR:
+ case SIOCGIFADDR:
+#ifdef INET
+ mtx_lock(&xnb->sc_lock);
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ ifp->if_flags |= IFF_UP;
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
+ IFF_DRV_OACTIVE);
+ if_link_state_change(ifp,
+ LINK_STATE_DOWN);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_link_state_change(ifp,
+ LINK_STATE_UP);
+ }
+ arp_ifinit(ifp, ifa);
+ mtx_unlock(&xnb->sc_lock);
+ } else {
+ mtx_unlock(&xnb->sc_lock);
+#endif
+ error = ether_ioctl(ifp, cmd, data);
+#ifdef INET
+ }
+#endif
+ break;
+ case SIOCSIFCAP:
+ mtx_lock(&xnb->sc_lock);
+ if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
+ ifp->if_capenable |= IFCAP_TXCSUM;
+ ifp->if_hwassist |= XNB_CSUM_FEATURES;
+ } else {
+ ifp->if_capenable &= ~(IFCAP_TXCSUM);
+ ifp->if_hwassist &= ~(XNB_CSUM_FEATURES);
+ }
+ if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) {
+ ifp->if_capenable |= IFCAP_RXCSUM;
+ } else {
+ ifp->if_capenable &= ~(IFCAP_RXCSUM);
+ }
+ /*
+ * TODO enable TSO4 and LRO once we no longer need
+ * to calculate checksums in software
+ */
+#if 0
+ if (ifr->if_reqcap |= IFCAP_TSO4) {
+ if (IFCAP_TXCSUM & ifp->if_capenable) {
+ printf("xnb: Xen netif requires that "
+ "TXCSUM be enabled in order "
+ "to use TSO4\n");
+ error = EINVAL;
+ } else {
+ ifp->if_capenable |= IFCAP_TSO4;
+ ifp->if_hwassist |= CSUM_TSO;
+ }
+ } else {
+ ifp->if_capenable &= ~(IFCAP_TSO4);
+ ifp->if_hwassist &= ~(CSUM_TSO);
+ }
+ if (ifr->ifreqcap |= IFCAP_LRO) {
+ ifp->if_capenable |= IFCAP_LRO;
+ } else {
+ ifp->if_capenable &= ~(IFCAP_LRO);
+ }
+#endif
+ mtx_unlock(&xnb->sc_lock);
+ break;
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ xnb_ifinit(xnb);
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd);
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
}
-
- netif_get(netif);
- device_set_ivars(ndev, netif);
- netif->ndev = ndev;
-
- device_probe_and_attach(ndev);
+ return (error);
+}
- done:
+static void
+xnb_start_locked(struct ifnet *ifp)
+{
+ netif_rx_back_ring_t *rxb;
+ struct xnb_softc *xnb;
+ struct mbuf *mbufc;
+ RING_IDX req_prod_local;
- mtx_unlock(&Giant);
+ xnb = ifp->if_softc;
+ rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
- return err;
-}
+ if (!xnb->carrier)
+ return;
-enum {
- VIF_SYSCTL_DOMID,
- VIF_SYSCTL_HANDLE,
- VIF_SYSCTL_TXRING,
- VIF_SYSCTL_RXRING,
-};
+ do {
+ int out_of_space = 0;
+ int notify;
+ req_prod_local = rxb->sring->req_prod;
+ xen_rmb();
+ for (;;) {
+ int error;
-static char *
-vif_sysctl_ring_info(netif_t *netif, int cmd)
-{
- char *buf = malloc(256, M_DEVBUF, M_WAITOK);
- if (buf) {
- if (!netif->rings_connected)
- sprintf(buf, "rings not connected\n");
- else if (cmd == VIF_SYSCTL_TXRING) {
- netif_tx_back_ring_t *tx = &netif->tx;
- sprintf(buf, "nr_ents=%x req_cons=%x"
- " req_prod=%x req_event=%x"
- " rsp_prod=%x rsp_event=%x",
- tx->nr_ents, tx->req_cons,
- tx->sring->req_prod, tx->sring->req_event,
- tx->sring->rsp_prod, tx->sring->rsp_event);
- } else {
- netif_rx_back_ring_t *rx = &netif->rx;
- sprintf(buf, "nr_ents=%x req_cons=%x"
- " req_prod=%x req_event=%x"
- " rsp_prod=%x rsp_event=%x",
- rx->nr_ents, rx->req_cons,
- rx->sring->req_prod, rx->sring->req_event,
- rx->sring->rsp_prod, rx->sring->rsp_event);
+ IF_DEQUEUE(&ifp->if_snd, mbufc);
+ if (mbufc == NULL)
+ break;
+ error = xnb_send(rxb, xnb->otherend_id, mbufc,
+ xnb->rx_gnttab);
+ switch (error) {
+ case EAGAIN:
+ /*
+ * Insufficient space in the ring.
+ * Requeue pkt and send when space is
+ * available.
+ */
+ IF_PREPEND(&ifp->if_snd, mbufc);
+ /*
+ * Perhaps the frontend missed an IRQ
+ * and went to sleep. Notify it to wake
+ * it up.
+ */
+ out_of_space = 1;
+ break;
+
+ case EINVAL:
+ /* OS gave a corrupt packet. Drop it.*/
+ ifp->if_oerrors++;
+ /* FALLTHROUGH */
+ default:
+ /* Send succeeded, or packet had error.
+ * Free the packet */
+ ifp->if_opackets++;
+ if (mbufc)
+ m_freem(mbufc);
+ break;
+ }
+ if (out_of_space != 0)
+ break;
}
- }
- return buf;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
+ if ((notify != 0) || (out_of_space != 0))
+ notify_remote_via_irq(xnb->irq);
+ rxb->sring->req_event = req_prod_local + 1;
+ xen_mb();
+ } while (rxb->sring->req_prod != req_prod_local) ;
}
+/**
+ * Sends one packet to the ring. Blocks until the packet is on the ring
+ * \param[in] mbufc Contains one packet to send. Caller must free
+ * \param[in,out] rxb The packet will be pushed onto this ring, but the
+ * otherend will not be notified.
+ * \param[in] otherend The domain ID of the other end of the connection
+ * \retval EAGAIN The ring did not have enough space for the packet.
+ * The ring has not been modified
+ * \param[in,out] gnttab Pointer to enough memory for a grant table. We make
+ * this a function parameter so that we will take less
+ * stack space.
+ * \retval EINVAL mbufc was corrupt or not convertible into a pkt
+ */
static int
-vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
+xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc,
+ gnttab_copy_table gnttab)
{
- device_t dev = (device_t)arg1;
- netif_t *netif = (netif_t *)device_get_ivars(dev);
- const char *value;
- char *buf = NULL;
- int err;
-
- switch (arg2) {
- case VIF_SYSCTL_DOMID:
- return sysctl_handle_int(oidp, NULL, netif->domid, req);
- case VIF_SYSCTL_HANDLE:
- return sysctl_handle_int(oidp, NULL, netif->handle, req);
- case VIF_SYSCTL_TXRING:
- case VIF_SYSCTL_RXRING:
- value = buf = vif_sysctl_ring_info(netif, arg2);
- break;
- default:
- return (EINVAL);
+ struct xnb_pkt pkt;
+ int error, n_entries, n_reqs;
+ RING_IDX space;
+
+ space = ring->sring->req_prod - ring->req_cons;
+ error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space);
+ if (error != 0)
+ return error;
+ n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend);
+ if (n_entries != 0) {
+ int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+ gnttab, n_entries);
+ KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n",
+ hv_ret));
}
- err = SYSCTL_OUT(req, value, strlen(value));
- if (buf != NULL)
- free(buf, M_DEVBUF);
+ n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring);
- return err;
+ return 0;
}
-/* Newbus vif device driver probe */
-static int
-vif_probe(device_t dev)
+static void
+xnb_start(struct ifnet *ifp)
{
- DDPRINTF("vif%d\n", device_get_unit(dev));
- return 0;
+ struct xnb_softc *xnb;
+
+ xnb = ifp->if_softc;
+ mtx_lock(&xnb->rx_lock);
+ xnb_start_locked(ifp);
+ mtx_unlock(&xnb->rx_lock);
}
-/* Newbus vif device driver attach */
-static int
-vif_attach(device_t dev)
+/* equivalent of network_open() in Linux */
+static void
+xnb_ifinit_locked(struct xnb_softc *xnb)
{
- netif_t *netif = (netif_t *)device_get_ivars(dev);
- uint8_t mac[ETHER_ADDR_LEN];
-
- DDPRINTF("%s\n", IFNAME(netif));
-
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
- dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
- "domid of frontend");
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
- dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
- "handle of frontend");
-#ifdef XEN_NETBACK_DEBUG
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD,
- dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
- "tx ring info");
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD,
- dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
- "rx ring info");
-#endif
+ struct ifnet *ifp;
- memset(mac, 0xff, sizeof(mac));
- mac[0] &= ~0x01;
-
- ether_ifattach(netif->ifp, mac);
- netif->attached = 1;
+ ifp = xnb->xnb_ifp;
- connect(netif);
+ mtx_assert(&xnb->sc_lock, MA_OWNED);
- if (netif->bridge) {
- DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
- int err = add_to_bridge(netif->ifp, netif->bridge);
- if (err) {
- WPRINTF("Error adding %s to %s; err=%d\n",
- IFNAME(netif), netif->bridge, err);
- }
- }
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ xnb_stop(xnb);
- return bus_generic_attach(dev);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_link_state_change(ifp, LINK_STATE_UP);
}
-/* Newbus vif device driver detach */
-static int
-vif_detach(device_t dev)
-{
- netif_t *netif = (netif_t *)device_get_ivars(dev);
- struct ifnet *ifp = netif->ifp;
- DDPRINTF("%s\n", IFNAME(netif));
+static void
+xnb_ifinit(void *xsc)
+{
+ struct xnb_softc *xnb = xsc;
- /* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ mtx_lock(&xnb->sc_lock);
+ xnb_ifinit_locked(xnb);
+ mtx_unlock(&xnb->sc_lock);
+}
- ether_ifdetach(ifp);
- bus_generic_detach(dev);
+/**
+ * Read the 'mac' node at the given device's node in the store, and parse that
+ * as colon-separated octets, placing result the given mac array. mac must be
+ * a preallocated array of length ETHER_ADDR_LEN ETH_ALEN (as declared in
+ * net/ethernet.h).
+ * Return 0 on success, or errno on error.
+ */
+static int
+xen_net_read_mac(device_t dev, uint8_t mac[])
+{
+ char *s, *e, *macstr;
+ const char *path;
+ int error = 0;
+ int i;
+
+ path = xenbus_get_node(dev);
+ error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
+ if (error != 0) {
+ xenbus_dev_fatal(dev, error, "parsing %s/mac", path);
+ } else {
+ s = macstr;
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ mac[i] = strtoul(s, &e, 16);
+ if (s == e || (e[0] != ':' && e[0] != 0)) {
+ error = ENOENT;
+ break;
+ }
+ s = &e[1];
+ }
+ free(macstr, M_XENBUS);
+ }
+ return error;
+}
- netif->attached = 0;
- netif_put(netif);
+/**
+ * Callback used by the generic networking code to tell us when our carrier
+ * state has changed. Since we don't have a physical carrier, we don't care
+ */
+static int
+xnb_ifmedia_upd(struct ifnet *ifp)
+{
+ return (0);
+}
- return 0;
+/**
+ * Callback used by the generic networking code to ask us what our carrier
+ * state is. Since we don't have a physical carrier, this is very simple
+ */
+static void
+xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+ ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
+ ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
}
-static device_method_t vif_methods[] = {
+
+/*---------------------------- NewBus Registration ---------------------------*/
+static device_method_t xnb_methods[] = {
/* Device interface */
- DEVMETHOD(device_probe, vif_probe),
- DEVMETHOD(device_attach, vif_attach),
- DEVMETHOD(device_detach, vif_detach),
+ DEVMETHOD(device_probe, xnb_probe),
+ DEVMETHOD(device_attach, xnb_attach),
+ DEVMETHOD(device_detach, xnb_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
- {0, 0}
-};
+ DEVMETHOD(device_suspend, xnb_suspend),
+ DEVMETHOD(device_resume, xnb_resume),
-static devclass_t vif_devclass;
+ /* Xenbus interface */
+ DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed),
-static driver_t vif_driver = {
- "vif",
- vif_methods,
- 0,
+ { 0, 0 }
};
-DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
+static driver_t xnb_driver = {
+ "xnb",
+ xnb_methods,
+ sizeof(struct xnb_softc),
+};
+devclass_t xnb_devclass;
+DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0);
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: t
- * End:
- */
+
+/*-------------------------- Unit Tests -------------------------------------*/
+#ifdef XNB_DEBUG
+#include "netback_unit_tests.c"
+#endif
OpenPOWER on IntegriCloud