summaryrefslogtreecommitdiffstats
path: root/sys/dev/xen/netback
diff options
context:
space:
mode:
authorkmacy <kmacy@FreeBSD.org>2008-08-12 20:01:57 +0000
committerkmacy <kmacy@FreeBSD.org>2008-08-12 20:01:57 +0000
commit510c672e13b9748ab91e72576bf72a6ac7a325af (patch)
treeed1f5b96e1e2ccdb3eca8477d06d5456cfd3a425 /sys/dev/xen/netback
parent61c513450203262aab5d2c4571d1e82e9b1b9247 (diff)
downloadFreeBSD-src-510c672e13b9748ab91e72576bf72a6ac7a325af.zip
FreeBSD-src-510c672e13b9748ab91e72576bf72a6ac7a325af.tar.gz
Import Xen paravirtual drivers.
MFC after: 2 weeks
Diffstat (limited to 'sys/dev/xen/netback')
-rw-r--r--sys/dev/xen/netback/netback.c1585
1 files changed, 1585 insertions, 0 deletions
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
new file mode 100644
index 0000000..950a68c
--- /dev/null
+++ b/sys/dev/xen/netback/netback.c
@@ -0,0 +1,1585 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_types.h>
+#include <net/ethernet.h>
+#include <net/if_bridgevar.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/in_cksum.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#ifdef XEN_NETBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+#define DDPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
+#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
+#else
+#define DDPRINTF(fmt, args...) ((void)0)
+#define DPRINTF_MBUF(_m) ((void)0)
+#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define BUG_ON PANIC_IF
+
+#define IFNAME(_np) (_np)->ifp->if_xname
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+struct ring_ref {
+ vm_offset_t va;
+ grant_handle_t handle;
+ uint64_t bus_addr;
+};
+
+typedef struct netback_info {
+
+ /* Schedule lists */
+ STAILQ_ENTRY(netback_info) next_tx;
+ STAILQ_ENTRY(netback_info) next_rx;
+ int on_tx_sched_list;
+ int on_rx_sched_list;
+
+ struct xenbus_device *xdev;
+ XenbusState frontend_state;
+
+ domid_t domid;
+ int handle;
+ char *bridge;
+
+ int rings_connected;
+ struct ring_ref tx_ring_ref;
+ struct ring_ref rx_ring_ref;
+ netif_tx_back_ring_t tx;
+ netif_rx_back_ring_t rx;
+ evtchn_port_t evtchn;
+ int irq;
+ void *irq_cookie;
+
+ struct ifnet *ifp;
+ int ref_cnt;
+
+ device_t ndev;
+ int attached;
+} netif_t;
+
+
+#define MAX_PENDING_REQS 256
+#define PKT_PROT_LEN 64
+
+static struct {
+ netif_tx_request_t req;
+ netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static uint16_t pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+/* Freed TX mbufs get batched on this ring before return to pending_ring. */
+static uint16_t dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct task net_tx_task, net_rx_task;
+static struct callout rx_task_callout;
+
+static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
+ STAILQ_HEAD_INITIALIZER(tx_sched_list);
+static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
+ STAILQ_HEAD_INITIALIZER(rx_sched_list);
+static struct mtx tx_sched_list_lock;
+static struct mtx rx_sched_list_lock;
+
+static int vif_unit_maker = 0;
+
+/* Protos */
+static void netback_start(struct ifnet *ifp);
+static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static int vif_add_dev(struct xenbus_device *xdev);
+static void disconnect_rings(netif_t *netif);
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+/* Debug code to display the contents of an mbuf */
+static void
+print_mbuf(struct mbuf *m, int max)
+{
+ int i, j=0;
+ printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
+ for (; m; m = m->m_next) {
+ unsigned char *d = m->m_data;
+ for (i=0; i < m->m_len; i++) {
+ if (max && j == max)
+ break;
+ if ((j++ % 16) == 0)
+ printf("\n%04x:", j);
+ printf(" %02x", d[i]);
+ }
+ }
+ printf("\n");
+}
+#endif
+
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+static unsigned long
+alloc_mfn(void)
+{
+ unsigned long mfn = 0;
+ struct xen_memory_reservation reservation = {
+ .extent_start = mfn_list,
+ .nr_extents = MAX_MFN_ALLOC,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ if ( unlikely(alloc_index == 0) )
+ alloc_index = HYPERVISOR_memory_op(
+ XENMEM_increase_reservation, &reservation);
+ if ( alloc_index != 0 )
+ mfn = mfn_list[--alloc_index];
+ return mfn;
+}
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+ void *pages;
+ int i = 0, j = 0;
+ multicall_entry_t mcl[17];
+ unsigned long mfn_list[16];
+ struct xen_memory_reservation reservation = {
+ .extent_start = mfn_list,
+ .nr_extents = 0,
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (pages == NULL)
+ return 0;
+
+ memset(mcl, 0, sizeof(mcl));
+
+ while (i < nr_pages) {
+ unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+ mcl[j].op = __HYPERVISOR_update_va_mapping;
+ mcl[j].args[0] = va;
+
+ mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+ xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+ if (j == 16 || i == nr_pages) {
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+ reservation.nr_extents = j;
+
+ mcl[j].op = __HYPERVISOR_memory_op;
+ mcl[j].args[0] = XENMEM_decrease_reservation;
+ mcl[j].args[1] = (unsigned long)&reservation;
+
+ (void)HYPERVISOR_multicall(mcl, j+1);
+
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+ j = 0;
+ }
+ }
+
+ return (unsigned long)pages;
+}
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+static void
+fixup_checksum(struct mbuf *m)
+{
+ struct ether_header *eh = mtod(m, struct ether_header *);
+ struct ip *ip = (struct ip *)(eh + 1);
+ int iphlen = ip->ip_hl << 2;
+ int iplen = ntohs(ip->ip_len);
+
+ if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
+ struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(IPPROTO_TCP + (iplen - iphlen)));
+ th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+ } else {
+ u_short csum;
+ struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(IPPROTO_UDP + (iplen - iphlen)));
+ if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
+ csum = 0xffff;
+ uh->uh_sum = csum;
+ m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+ }
+}
+#endif
+
+/* Add the interface to the specified bridge */
+static int
+add_to_bridge(struct ifnet *ifp, char *bridge)
+{
+ struct ifdrv ifd;
+ struct ifbreq ifb;
+ struct ifnet *ifp_bridge = ifunit(bridge);
+
+ if (!ifp_bridge)
+ return ENOENT;
+
+ bzero(&ifd, sizeof(ifd));
+ bzero(&ifb, sizeof(ifb));
+
+ strcpy(ifb.ifbr_ifsname, ifp->if_xname);
+ strcpy(ifd.ifd_name, ifp->if_xname);
+ ifd.ifd_cmd = BRDGADD;
+ ifd.ifd_len = sizeof(ifb);
+ ifd.ifd_data = &ifb;
+
+ return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
+
+}
+
+static int
+netif_create(int handle, struct xenbus_device *xdev, char *bridge)
+{
+ netif_t *netif;
+ struct ifnet *ifp;
+
+ netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!netif)
+ return ENOMEM;
+
+ netif->ref_cnt = 1;
+ netif->handle = handle;
+ netif->domid = xdev->otherend_id;
+ netif->xdev = xdev;
+ netif->bridge = bridge;
+ xdev->data = netif;
+
+ /* Set up ifnet structure */
+ ifp = netif->ifp = if_alloc(IFT_ETHER);
+ if (!ifp) {
+ if (bridge)
+ free(bridge, M_DEVBUF);
+ free(netif, M_DEVBUF);
+ return ENOMEM;
+ }
+
+ ifp->if_softc = netif;
+ if_initname(ifp, "vif",
+ atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+ ifp->if_output = ether_output;
+ ifp->if_start = netback_start;
+ ifp->if_ioctl = netback_ioctl;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+
+ DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+
+ return 0;
+}
+
+static void
+netif_get(netif_t *netif)
+{
+ atomic_add_int(&netif->ref_cnt, 1);
+}
+
+static void
+netif_put(netif_t *netif)
+{
+ if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
+ DPRINTF("%s\n", IFNAME(netif));
+ disconnect_rings(netif);
+ if (netif->ifp) {
+ if_free(netif->ifp);
+ netif->ifp = NULL;
+ }
+ if (netif->bridge)
+ free(netif->bridge, M_DEVBUF);
+ free(netif, M_DEVBUF);
+ }
+}
+
+static int
+netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
+ IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
+ return 0;
+ }
+
+ DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
+
+ return ether_ioctl(ifp, cmd, data);
+}
+
+static inline void
+maybe_schedule_tx_action(void)
+{
+ smp_mb();
+ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
+ taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_tx_schedule_list(void)
+{
+ netif_t *netif;
+
+ mtx_lock(&tx_sched_list_lock);
+
+ if ((netif = STAILQ_FIRST(&tx_sched_list))) {
+ STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
+ STAILQ_NEXT(netif, next_tx) = NULL;
+ netif->on_tx_sched_list = 0;
+ }
+
+ mtx_unlock(&tx_sched_list_lock);
+
+ return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_tx_schedule_list_tail(netif_t *netif)
+{
+ if (netif->on_tx_sched_list)
+ return;
+
+ mtx_lock(&tx_sched_list_lock);
+ if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ netif_get(netif);
+ STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
+ netif->on_tx_sched_list = 1;
+ }
+ mtx_unlock(&tx_sched_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+static void
+netif_schedule_tx_work(netif_t *netif)
+{
+ int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+ if (more_to_do) {
+ DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
+ add_to_tx_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+ }
+}
+
+static struct mtx dealloc_lock;
+MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
+
+static void
+netif_idx_release(uint16_t pending_idx)
+{
+ mtx_lock_spin(&dealloc_lock);
+ dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+ mtx_unlock_spin(&dealloc_lock);
+
+ taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+}
+
+static void
+make_tx_response(netif_t *netif,
+ uint16_t id,
+ int8_t st)
+{
+ RING_IDX i = netif->tx.rsp_prod_pvt;
+ netif_tx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netif->tx, i);
+ resp->id = id;
+ resp->status = st;
+
+ netif->tx.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+ if (notify)
+ notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ if (i == netif->tx.req_cons) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+ if (more_to_do)
+ add_to_tx_schedule_list_tail(netif);
+ }
+#endif
+}
+
+inline static void
+net_tx_action_dealloc(void)
+{
+ gnttab_unmap_grant_ref_t *gop;
+ uint16_t pending_idx;
+ PEND_RING_IDX dc, dp;
+ netif_t *netif;
+ int ret;
+
+ dc = dealloc_cons;
+ dp = dealloc_prod;
+
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = tx_unmap_ops;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->host_addr = MMAP_VADDR(pending_idx);
+ gop->dev_bus_addr = 0;
+ gop->handle = grant_tx_handle[pending_idx];
+ gop++;
+ }
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+ BUG_ON(ret);
+
+ while (dealloc_cons != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+ netif = pending_tx_info[pending_idx].netif;
+
+ make_tx_response(netif, pending_tx_info[pending_idx].req.id,
+ NETIF_RSP_OKAY);
+
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+ netif_put(netif);
+ }
+}
+
+static void
+netif_page_release(void *buf, void *args)
+{
+ uint16_t pending_idx = (unsigned int)args;
+
+ DDPRINTF("pending_idx=%u\n", pending_idx);
+
+ KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
+
+ netif_idx_release(pending_idx);
+}
+
+static void
+net_tx_action(void *context, int pending)
+{
+ struct mbuf *m;
+ netif_t *netif;
+ netif_tx_request_t txreq;
+ uint16_t pending_idx;
+ RING_IDX i;
+ gnttab_map_grant_ref_t *mop;
+ int ret, work_to_do;
+ struct mbuf *txq = NULL, *txq_last = NULL;
+
+ if (dealloc_cons != dealloc_prod)
+ net_tx_action_dealloc();
+
+ mop = tx_map_ops;
+ while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
+
+ /* Get a netif from the list with work to do. */
+ netif = remove_from_tx_schedule_list();
+
+ DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
+ IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+ if (!work_to_do) {
+ netif_put(netif);
+ continue;
+ }
+
+ i = netif->tx.req_cons;
+ rmb(); /* Ensure that we see the request before we copy it. */
+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
+ /* If we want credit-based scheduling, coud add it here - WORK */
+
+ netif->tx.req_cons++;
+
+ netif_schedule_tx_work(netif);
+
+ if (unlikely(txreq.size < ETHER_HDR_LEN) ||
+ unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
+ WPRINTF("Bad packet size: %d\n", txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ /* No crossing a page as the payload mustn't fragment. */
+ if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+ WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
+ txreq.offset, txreq.size,
+ (txreq.offset & PAGE_MASK) + txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (!m) {
+ WPRINTF("Failed to allocate mbuf\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ break;
+ }
+ m->m_pkthdr.rcvif = netif->ifp;
+
+ if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
+ struct mbuf *n;
+ MGET(n, M_DONTWAIT, MT_DATA);
+ if (!(m->m_next = n)) {
+ m_freem(m);
+ WPRINTF("Failed to allocate second mbuf\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ break;
+ }
+ n->m_len = txreq.size - PKT_PROT_LEN;
+ m->m_len = PKT_PROT_LEN;
+ } else
+ m->m_len = txreq.size;
+
+ mop->host_addr = MMAP_VADDR(pending_idx);
+ mop->dom = netif->domid;
+ mop->ref = txreq.gref;
+ mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+ mop++;
+
+ memcpy(&pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
+ pending_tx_info[pending_idx].netif = netif;
+ *((uint16_t *)m->m_data) = pending_idx;
+
+ if (txq_last)
+ txq_last->m_nextpkt = m;
+ else
+ txq = m;
+ txq_last = m;
+
+ pending_cons++;
+
+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+ break;
+ }
+
+ if (!txq)
+ return;
+
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+ BUG_ON(ret);
+
+ mop = tx_map_ops;
+ while ((m = txq) != NULL) {
+ caddr_t data;
+
+ txq = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ pending_idx = *((uint16_t *)m->m_data);
+ netif = pending_tx_info[pending_idx].netif;
+ memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
+
+ /* Check the remap error code. */
+ if (unlikely(mop->status)) {
+ WPRINTF("#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ m_freem(m);
+ mop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+
+#if 0
+ /* Can't do this in FreeBSD since vtophys() returns the pfn */
+ /* of the remote domain who loaned us the machine page - DPT */
+ xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
+ mop->dev_bus_addr >> PAGE_SHIFT;
+#endif
+ grant_tx_handle[pending_idx] = mop->handle;
+
+ /* Setup data in mbuf (lengths are already set) */
+ data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
+ bcopy(data, m->m_data, m->m_len);
+ if (m->m_next) {
+ struct mbuf *n = m->m_next;
+ MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
+ (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
+ n->m_data = &data[PKT_PROT_LEN];
+ } else {
+ /* Schedule a response immediately. */
+ netif_idx_release(pending_idx);
+ }
+
+ if ((txreq.flags & NETTXF_data_validated)) {
+ /* Tell the stack the checksums are okay */
+ m->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+
+ /* If necessary, inform stack to compute the checksums if it forwards the packet */
+ if ((txreq.flags & NETTXF_csum_blank)) {
+ struct ether_header *eh = mtod(m, struct ether_header *);
+ if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
+ struct ip *ip = (struct ip *)&m->m_data[14];
+ if (ip->ip_p == IPPROTO_TCP)
+ m->m_pkthdr.csum_flags |= CSUM_TCP;
+ else if (ip->ip_p == IPPROTO_UDP)
+ m->m_pkthdr.csum_flags |= CSUM_UDP;
+ }
+ }
+
+ netif->ifp->if_ibytes += m->m_pkthdr.len;
+ netif->ifp->if_ipackets++;
+
+ DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
+ m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
+ DPRINTF_MBUF_LEN(m, 128);
+
+ (*netif->ifp->if_input)(netif->ifp, m);
+
+ mop++;
+ }
+}
+
+/* Handle interrupt from a frontend */
+static void
+netback_intr(void *arg)
+{
+ netif_t *netif = arg;
+ DDPRINTF("%s\n", IFNAME(netif));
+ add_to_tx_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_rx_schedule_list(void)
+{
+ netif_t *netif;
+
+ mtx_lock(&rx_sched_list_lock);
+
+ if ((netif = STAILQ_FIRST(&rx_sched_list))) {
+ STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
+ STAILQ_NEXT(netif, next_rx) = NULL;
+ netif->on_rx_sched_list = 0;
+ }
+
+ mtx_unlock(&rx_sched_list_lock);
+
+ return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_rx_schedule_list_tail(netif_t *netif)
+{
+ if (netif->on_rx_sched_list)
+ return;
+
+ mtx_lock(&rx_sched_list_lock);
+ if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ netif_get(netif);
+ STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
+ netif->on_rx_sched_list = 1;
+ }
+ mtx_unlock(&rx_sched_list_lock);
+}
+
+static int
+make_rx_response(netif_t *netif, uint16_t id, int8_t st,
+ uint16_t offset, uint16_t size, uint16_t flags)
+{
+ RING_IDX i = netif->rx.rsp_prod_pvt;
+ netif_rx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netif->rx, i);
+ resp->offset = offset;
+ resp->flags = flags;
+ resp->id = id;
+ resp->status = (int16_t)size;
+ if (st < 0)
+ resp->status = (int16_t)st;
+
+ DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
+ i, resp->offset, resp->flags, resp->id, resp->status);
+
+ netif->rx.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+
+ return notify;
+}
+
+static int
+netif_rx(netif_t *netif)
+{
+ struct ifnet *ifp = netif->ifp;
+ struct mbuf *m;
+ multicall_entry_t *mcl;
+ mmu_update_t *mmu;
+ gnttab_transfer_t *gop;
+ unsigned long vdata, old_mfn, new_mfn;
+ struct mbuf *rxq = NULL, *rxq_last = NULL;
+ int ret, notify = 0, pkts_dequeued = 0;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ mcl = rx_mcl;
+ mmu = rx_mmu;
+ gop = grant_rx_op;
+
+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+
+ /* Quit if the target domain has no receive buffers */
+ if (netif->rx.req_cons == netif->rx.sring->req_prod)
+ break;
+
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ pkts_dequeued++;
+
+ /* Check if we need to copy the data */
+ if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
+ (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
+ struct mbuf *n;
+
+ DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
+ m->m_flags,
+ (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
+ (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
+ (unsigned int)m->m_next);
+
+ /* Make copy */
+ MGETHDR(n, M_DONTWAIT, MT_DATA);
+ if (!n)
+ goto drop;
+
+ MCLGET(n, M_DONTWAIT);
+ if (!(n->m_flags & M_EXT)) {
+ m_freem(n);
+ goto drop;
+ }
+
+ /* Leave space at front and keep current alignment */
+ n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
+
+ if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
+ WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
+ m_freem(n);
+ goto drop;
+ }
+ m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
+ n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
+ n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
+ m_freem(m);
+ m = n;
+ }
+
+ vdata = (unsigned long)m->m_data;
+ old_mfn = vtomach(vdata) >> PAGE_SHIFT;
+
+ if ((new_mfn = alloc_mfn()) == 0)
+ goto drop;
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+ /* Check if we need to compute a checksum. This happens */
+ /* when bridging from one domain to another. */
+ if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+ fixup_checksum(m);
+#endif
+
+ xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
+
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = vdata;
+ mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
+ mcl->args[2] = 0;
+ mcl->args[3] = 0;
+ mcl++;
+
+ gop->mfn = old_mfn;
+ gop->domid = netif->domid;
+ gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
+ netif->rx.req_cons++;
+ gop++;
+
+ mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ mmu->val = vtophys(vdata) >> PAGE_SHIFT;
+ mmu++;
+
+ if (rxq_last)
+ rxq_last->m_nextpkt = m;
+ else
+ rxq = m;
+ rxq_last = m;
+
+ DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
+ DPRINTF_MBUF_LEN(m, 128);
+
+ /* Filled the batch queue? */
+ if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
+ break;
+
+ continue;
+ drop:
+ DDPRINTF("dropping pkt\n");
+ ifp->if_oerrors++;
+ m_freem(m);
+ }
+
+ if (mcl == rx_mcl)
+ return pkts_dequeued;
+
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = mmu - rx_mmu;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ mcl++;
+
+ mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+ ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+ BUG_ON(ret != 0);
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
+ BUG_ON(ret != 0);
+
+ mcl = rx_mcl;
+ gop = grant_rx_op;
+
+ while ((m = rxq) != NULL) {
+ int8_t status;
+ uint16_t id, flags = 0;
+
+ rxq = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /* Rederive the machine addresses. */
+ new_mfn = mcl->args[1] >> PAGE_SHIFT;
+ old_mfn = gop->mfn;
+
+ ifp->if_obytes += m->m_pkthdr.len;
+ ifp->if_opackets++;
+
+ /* The update_va_mapping() must not fail. */
+ BUG_ON(mcl->result != 0);
+
+ /* Setup flags */
+ if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+ flags |= NETRXF_data_validated;
+
+ /* Check the reassignment error code. */
+ status = NETIF_RSP_OKAY;
+ if (gop->status != 0) {
+ DPRINTF("Bad status %d from grant transfer to DOM%u\n",
+ gop->status, netif->domid);
+ /*
+ * Page no longer belongs to us unless GNTST_bad_page,
+ * but that should be a fatal error anyway.
+ */
+ BUG_ON(gop->status == GNTST_bad_page);
+ status = NETIF_RSP_ERROR;
+ }
+ id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
+ notify |= make_rx_response(netif, id, status,
+ (unsigned long)m->m_data & PAGE_MASK,
+ m->m_pkthdr.len, flags);
+
+ m_freem(m);
+ mcl++;
+ gop++;
+ }
+
+ if (notify)
+ notify_remote_via_irq(netif->irq);
+
+ return pkts_dequeued;
+}
+
+static void
+rx_task_timer(void *arg)
+{
+ DDPRINTF("\n");
+ taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+}
+
+static void
+net_rx_action(void *context, int pending)
+{
+ netif_t *netif, *last_zero_work = NULL;
+
+ DDPRINTF("\n");
+
+ while ((netif = remove_from_rx_schedule_list())) {
+ struct ifnet *ifp = netif->ifp;
+
+ if (netif == last_zero_work) {
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ add_to_rx_schedule_list_tail(netif);
+ netif_put(netif);
+ if (!STAILQ_EMPTY(&rx_sched_list))
+ callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
+ break;
+ }
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (netif_rx(netif))
+ last_zero_work = NULL;
+ else if (!last_zero_work)
+ last_zero_work = netif;
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ add_to_rx_schedule_list_tail(netif);
+ }
+
+ netif_put(netif);
+ }
+}
+
+static void
+netback_start(struct ifnet *ifp)
+{
+ netif_t *netif = (netif_t *)ifp->if_softc;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ add_to_rx_schedule_list_tail(netif);
+ taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+}
+
+/* Map a grant ref to a ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+ struct gnttab_map_grant_ref op;
+
+ ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ if (ring->va == 0)
+ return ENOMEM;
+
+ op.host_addr = ring->va;
+ op.flags = GNTMAP_host_map;
+ op.ref = ref;
+ op.dom = dom;
+ HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+ if (op.status) {
+ WPRINTF("grant table op err=%d\n", op.status);
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+ return EACCES;
+ }
+
+ ring->handle = op.handle;
+ ring->bus_addr = op.dev_bus_addr;
+
+ return 0;
+}
+
+/* Unmap grant ref for a ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+ struct gnttab_unmap_grant_ref op;
+
+ op.host_addr = ring->va;
+ op.dev_bus_addr = ring->bus_addr;
+ op.handle = ring->handle;
+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+ if (op.status)
+ WPRINTF("grant table op err=%d\n", op.status);
+
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+}
+
+static int
+connect_rings(netif_t *netif)
+{
+ struct xenbus_device *xdev = netif->xdev;
+ netif_tx_sring_t *txs;
+ netif_rx_sring_t *rxs;
+ unsigned long tx_ring_ref, rx_ring_ref;
+ evtchn_port_t evtchn;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
+
+ // Grab FE data and map his memory
+ err = xenbus_gather(NULL, xdev->otherend,
+ "tx-ring-ref", "%lu", &tx_ring_ref,
+ "rx-ring-ref", "%lu", &rx_ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_fatal(xdev, err,
+ "reading %s/ring-ref and event-channel",
+ xdev->otherend);
+ return err;
+ }
+
+ err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "mapping tx ring");
+ return err;
+ }
+ txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+ err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
+ if (err) {
+ unmap_ring(&netif->tx_ring_ref);
+ xenbus_dev_fatal(xdev, err, "mapping rx ring");
+ return err;
+ }
+ rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+ op.u.bind_interdomain.remote_dom = netif->domid;
+ op.u.bind_interdomain.remote_port = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_ring(&netif->tx_ring_ref);
+ unmap_ring(&netif->rx_ring_ref);
+ xenbus_dev_fatal(xdev, err, "binding event channel");
+ return err;
+ }
+ netif->evtchn = op.u.bind_interdomain.local_port;
+
+ /* bind evtchn to irq handler */
+ netif->irq =
+ bind_evtchn_to_irqhandler(netif->evtchn, "netback",
+ netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
+
+ netif->rings_connected = 1;
+
+ DPRINTF("%s connected! evtchn=%d irq=%d\n",
+ IFNAME(netif), netif->evtchn, netif->irq);
+
+ return 0;
+}
+
+static void
+disconnect_rings(netif_t *netif)
+{
+ DPRINTF("\n");
+
+ if (netif->rings_connected) {
+ unbind_from_irqhandler(netif->irq, netif->irq_cookie);
+ netif->irq = 0;
+ unmap_ring(&netif->tx_ring_ref);
+ unmap_ring(&netif->rx_ring_ref);
+ netif->rings_connected = 0;
+ }
+}
+
+static void
+connect(netif_t *netif)
+{
+ if (!netif->xdev ||
+ !netif->attached ||
+ netif->frontend_state != XenbusStateConnected) {
+ return;
+ }
+
+ if (!connect_rings(netif)) {
+ xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
+
+ /* Turn on interface */
+ netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ netif->ifp->if_flags |= IFF_UP;
+ }
+}
+
+static int
+netback_remove(struct xenbus_device *xdev)
+{
+ netif_t *netif = xdev->data;
+ device_t ndev;
+
+ DPRINTF("remove %s\n", xdev->nodename);
+
+ if ((ndev = netif->ndev)) {
+ netif->ndev = NULL;
+ mtx_lock(&Giant);
+ device_detach(ndev);
+ mtx_unlock(&Giant);
+ }
+
+ xdev->data = NULL;
+ netif->xdev = NULL;
+ netif_put(netif);
+
+ return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the frontend.
+ * Switch to Connected state.
+ */
+static int
+netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+ int err;
+ long handle;
+ char *bridge;
+
+ DPRINTF("node=%s\n", xdev->nodename);
+
+ /* Grab the handle */
+ err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
+ if (err != 1) {
+ xenbus_dev_fatal(xdev, err, "reading handle");
+ return err;
+ }
+
+ /* Check for bridge */
+ bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
+ if (IS_ERR(bridge))
+ bridge = NULL;
+
+ err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "writing switch state");
+ return err;
+ }
+
+ err = netif_create(handle, xdev, bridge);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "creating netif");
+ return err;
+ }
+
+ err = vif_add_dev(xdev);
+ if (err) {
+ netif_put((netif_t *)xdev->data);
+ xenbus_dev_fatal(xdev, err, "adding vif device");
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netback_resume(struct xenbus_device *xdev)
+{
+ DPRINTF("node=%s\n", xdev->nodename);
+ return 0;
+}
+
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *xdev,
+ XenbusState frontend_state)
+{
+ netif_t *netif = xdev->data;
+
+ DPRINTF("state=%d\n", frontend_state);
+
+ netif->frontend_state = frontend_state;
+
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ break;
+ case XenbusStateConnected:
+ connect(netif);
+ break;
+ case XenbusStateClosing:
+ xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+ break;
+ case XenbusStateClosed:
+ xenbus_remove_device(xdev);
+ break;
+ case XenbusStateUnknown:
+ case XenbusStateInitWait:
+ xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id netback_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static struct xenbus_driver netback = {
+ .name = "netback",
+ .ids = netback_ids,
+ .probe = netback_probe,
+ .remove = netback_remove,
+ .resume= netback_resume,
+ .otherend_changed = frontend_changed,
+};
+
+static void
+netback_init(void *unused)
+{
+ callout_init(&rx_task_callout, CALLOUT_MPSAFE);
+
+ mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
+ BUG_ON(!mmap_vstart);
+
+ pending_cons = 0;
+ for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
+ pending_ring[pending_prod] = pending_prod;
+
+ TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
+ TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
+ mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
+ mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
+
+ DPRINTF("registering %s\n", netback.name);
+
+ xenbus_register_backend(&netback);
+}
+
+SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
+
+static int
+vif_add_dev(struct xenbus_device *xdev)
+{
+ netif_t *netif = xdev->data;
+ device_t nexus, ndev;
+ devclass_t dc;
+ int err = 0;
+
+ mtx_lock(&Giant);
+
+ /* We will add a vif device as a child of nexus0 (for now) */
+ if (!(dc = devclass_find("nexus")) ||
+ !(nexus = devclass_get_device(dc, 0))) {
+ WPRINTF("could not find nexus0!\n");
+ err = ENOENT;
+ goto done;
+ }
+
+
+ /* Create a newbus device representing the vif */
+ ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
+ if (!ndev) {
+ WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
+ err = EFAULT;
+ goto done;
+ }
+
+ netif_get(netif);
+ device_set_ivars(ndev, netif);
+ netif->ndev = ndev;
+
+ device_probe_and_attach(ndev);
+
+ done:
+
+ mtx_unlock(&Giant);
+
+ return err;
+}
+
+enum {
+ VIF_SYSCTL_DOMID,
+ VIF_SYSCTL_HANDLE,
+ VIF_SYSCTL_TXRING,
+ VIF_SYSCTL_RXRING,
+};
+
+static char *
+vif_sysctl_ring_info(netif_t *netif, int cmd)
+{
+ char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+ if (buf) {
+ if (!netif->rings_connected)
+ sprintf(buf, "rings not connected\n");
+ else if (cmd == VIF_SYSCTL_TXRING) {
+ netif_tx_back_ring_t *tx = &netif->tx;
+ sprintf(buf, "nr_ents=%x req_cons=%x"
+ " req_prod=%x req_event=%x"
+ " rsp_prod=%x rsp_event=%x",
+ tx->nr_ents, tx->req_cons,
+ tx->sring->req_prod, tx->sring->req_event,
+ tx->sring->rsp_prod, tx->sring->rsp_event);
+ } else {
+ netif_rx_back_ring_t *rx = &netif->rx;
+ sprintf(buf, "nr_ents=%x req_cons=%x"
+ " req_prod=%x req_event=%x"
+ " rsp_prod=%x rsp_event=%x",
+ rx->nr_ents, rx->req_cons,
+ rx->sring->req_prod, rx->sring->req_event,
+ rx->sring->rsp_prod, rx->sring->rsp_event);
+ }
+ }
+ return buf;
+}
+
+static int
+vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ device_t dev = (device_t)arg1;
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ const char *value;
+ char *buf = NULL;
+ int err;
+
+ switch (arg2) {
+ case VIF_SYSCTL_DOMID:
+ return sysctl_handle_int(oidp, NULL, netif->domid, req);
+ case VIF_SYSCTL_HANDLE:
+ return sysctl_handle_int(oidp, NULL, netif->handle, req);
+ case VIF_SYSCTL_TXRING:
+ case VIF_SYSCTL_RXRING:
+ value = buf = vif_sysctl_ring_info(netif, arg2);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ err = SYSCTL_OUT(req, value, strlen(value));
+ if (buf != NULL)
+ free(buf, M_DEVBUF);
+
+ return err;
+}
+
+/* Newbus vif device driver probe */
+static int
+vif_probe(device_t dev)
+{
+ DDPRINTF("vif%d\n", device_get_unit(dev));
+ return 0;
+}
+
+/* Newbus vif device driver attach */
+static int
+vif_attach(device_t dev)
+{
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ uint8_t mac[ETHER_ADDR_LEN];
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
+ "domid of frontend");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
+ "handle of frontend");
+#ifdef XEN_NETBACK_DEBUG
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "txring", CTLFLAG_RD,
+ dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
+ "tx ring info");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "rxring", CTLFLAG_RD,
+ dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
+ "rx ring info");
+#endif
+
+ memset(mac, 0xff, sizeof(mac));
+ mac[0] &= ~0x01;
+
+ ether_ifattach(netif->ifp, mac);
+ netif->attached = 1;
+
+ connect(netif);
+
+ if (netif->bridge) {
+ DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
+ int err = add_to_bridge(netif->ifp, netif->bridge);
+ if (err) {
+ WPRINTF("Error adding %s to %s; err=%d\n",
+ IFNAME(netif), netif->bridge, err);
+ }
+ }
+
+ return bus_generic_attach(dev);
+}
+
+/* Newbus vif device driver detach */
+static int
+vif_detach(device_t dev)
+{
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ struct ifnet *ifp = netif->ifp;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ ether_ifdetach(ifp);
+
+ bus_generic_detach(dev);
+
+ netif->attached = 0;
+
+ netif_put(netif);
+
+ return 0;
+}
+
+static device_method_t vif_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vif_probe),
+ DEVMETHOD(device_attach, vif_attach),
+ DEVMETHOD(device_detach, vif_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+ {0, 0}
+};
+
+static devclass_t vif_devclass;
+
+static driver_t vif_driver = {
+ "vif",
+ vif_methods,
+ 0,
+};
+
+DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
OpenPOWER on IntegriCloud