/*
 * Copyright (c) 2006, Cisco Systems, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer. 
 * 2. Redistributions in binary form must reproduce the above copyright 
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the distribution. 
 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
 *    may be used to endorse or promote products derived from this software 
 *    without specific prior written permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_sctp.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>

#include <sys/module.h>
#include <sys/bus.h>
#include <sys/sysctl.h>

#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_types.h>
#include <net/ethernet.h>
#include <net/if_bridgevar.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#ifdef SCTP
#include <netinet/sctp.h>
#include <netinet/sctp_crc32.h>
#endif

#include <vm/vm_extern.h>
#include <vm/vm_kern.h>

#include <machine/in_cksum.h>
#include <machine/xen-os.h>
#include <machine/hypervisor.h>
#include <machine/hypervisor-ifs.h>
#include <machine/xen_intr.h>
#include <machine/evtchn.h>
#include <machine/xenbus.h>
#include <machine/gnttab.h>
#include <machine/xen-public/memory.h>
#include <dev/xen/xenbus/xenbus_comms.h>


#ifdef XEN_NETBACK_DEBUG
#define DPRINTF(fmt, args...) \
    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
#else
#define DPRINTF(fmt, args...) ((void)0)
#endif

#ifdef XEN_NETBACK_DEBUG_LOTS
#define DDPRINTF(fmt, args...) \
    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
#else
#define DDPRINTF(fmt, args...) ((void)0)
#define DPRINTF_MBUF(_m) ((void)0)
#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
#endif

#define WPRINTF(fmt, args...) \
    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)

#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
#define BUG_ON PANIC_IF

#define IFNAME(_np) (_np)->ifp->if_xname

#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)

struct ring_ref {
	vm_offset_t va;
	grant_handle_t handle;
	uint64_t bus_addr;
};

typedef struct netback_info {

	/* Schedule lists */
	STAILQ_ENTRY(netback_info) next_tx;
	STAILQ_ENTRY(netback_info) next_rx;
	int on_tx_sched_list;
	int on_rx_sched_list;

	struct xenbus_device *xdev;
	XenbusState frontend_state;

	domid_t domid;
	int handle;
	char *bridge;

	int rings_connected;
	struct ring_ref tx_ring_ref;
	struct ring_ref rx_ring_ref;
	netif_tx_back_ring_t tx;
	netif_rx_back_ring_t rx;
	evtchn_port_t evtchn;
	int irq;
	void *irq_cookie;

	struct ifnet *ifp;
	int ref_cnt;

	device_t ndev;
	int attached;
} netif_t;


#define MAX_PENDING_REQS 256
#define PKT_PROT_LEN 64

static struct {
	netif_tx_request_t req;
	netif_t *netif;
} pending_tx_info[MAX_PENDING_REQS];
static uint16_t pending_ring[MAX_PENDING_REQS];
typedef unsigned int PEND_RING_IDX;
#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
static PEND_RING_IDX pending_prod, pending_cons;
#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)

static unsigned long mmap_vstart;
#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))

/* Freed TX mbufs get batched on this ring before return to pending_ring. */
static uint16_t dealloc_ring[MAX_PENDING_REQS];
static PEND_RING_IDX dealloc_prod, dealloc_cons;

static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];

static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];

static struct task net_tx_task, net_rx_task;
static struct callout rx_task_callout;

static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
	STAILQ_HEAD_INITIALIZER(tx_sched_list);
static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
	STAILQ_HEAD_INITIALIZER(rx_sched_list);
static struct mtx tx_sched_list_lock;
static struct mtx rx_sched_list_lock;

static int vif_unit_maker = 0;

/* Protos */
static void netback_start(struct ifnet *ifp);
static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
static int vif_add_dev(struct xenbus_device *xdev);
static void disconnect_rings(netif_t *netif);

#ifdef XEN_NETBACK_DEBUG_LOTS
/* Debug code to display the contents of an mbuf */
static void
print_mbuf(struct mbuf *m, int max)
{
	int i, j=0;
	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
	for (; m; m = m->m_next) {
		unsigned char *d = m->m_data;
		for (i=0; i < m->m_len; i++) {
			if (max && j == max)
				break;
			if ((j++ % 16) == 0)
				printf("\n%04x:", j);
			printf(" %02x", d[i]);
		}
	}
	printf("\n");
}
#endif


#define MAX_MFN_ALLOC 64
static unsigned long mfn_list[MAX_MFN_ALLOC];
static unsigned int alloc_index = 0;

static unsigned long
alloc_mfn(void)
{
	unsigned long mfn = 0;
	struct xen_memory_reservation reservation = {
		.extent_start = mfn_list,
		.nr_extents   = MAX_MFN_ALLOC,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	if ( unlikely(alloc_index == 0) )
		alloc_index = HYPERVISOR_memory_op(
			XENMEM_increase_reservation, &reservation);
	if ( alloc_index != 0 )
		mfn = mfn_list[--alloc_index];
	return mfn;
}

static unsigned long
alloc_empty_page_range(unsigned long nr_pages)
{
	void *pages;
	int i = 0, j = 0;
	multicall_entry_t mcl[17];
	unsigned long mfn_list[16];
	struct xen_memory_reservation reservation = {
		.extent_start = mfn_list,
		.nr_extents   = 0,
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
	if (pages == NULL)
		return 0;

	memset(mcl, 0, sizeof(mcl));

	while (i < nr_pages) {
		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);

		mcl[j].op = __HYPERVISOR_update_va_mapping;
		mcl[j].args[0] = va;

		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;

		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;

		if (j == 16 || i == nr_pages) {
			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;

			reservation.nr_extents = j;

			mcl[j].op = __HYPERVISOR_memory_op;
			mcl[j].args[0] = XENMEM_decrease_reservation;
			mcl[j].args[1] =  (unsigned long)&reservation;
			
			(void)HYPERVISOR_multicall(mcl, j+1);

			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
			j = 0;
		}
	}

	return (unsigned long)pages;
}

#ifdef XEN_NETBACK_FIXUP_CSUM
static void
fixup_checksum(struct mbuf *m)
{
	struct ether_header *eh = mtod(m, struct ether_header *);
	struct ip *ip = (struct ip *)(eh + 1);
	int iphlen = ip->ip_hl << 2;
	int iplen = ntohs(ip->ip_len);

	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
			htons(IPPROTO_TCP + (iplen - iphlen)));
		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
#ifdef SCTP
	} else if (sw_csum & CSUM_SCTP) {
		sctp_delayed_cksum(m, iphlen);
		sw_csum &= ~CSUM_SCTP;
#endif
	} else {
		u_short csum;
		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
			htons(IPPROTO_UDP + (iplen - iphlen)));
		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
			csum = 0xffff;
		uh->uh_sum = csum;
		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
	}
}
#endif

/* Add the interface to the specified bridge */
static int
add_to_bridge(struct ifnet *ifp, char *bridge)
{
	struct ifdrv ifd;
	struct ifbreq ifb;
	struct ifnet *ifp_bridge = ifunit(bridge);

	if (!ifp_bridge)
		return ENOENT;

	bzero(&ifd, sizeof(ifd));
	bzero(&ifb, sizeof(ifb));

	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
	strcpy(ifd.ifd_name, ifp->if_xname);
	ifd.ifd_cmd = BRDGADD;
	ifd.ifd_len = sizeof(ifb);
	ifd.ifd_data = &ifb;

	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
	
}

static int
netif_create(int handle, struct xenbus_device *xdev, char *bridge)
{
	netif_t *netif;
	struct ifnet *ifp;

	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
	if (!netif)
		return ENOMEM;

	netif->ref_cnt = 1;
	netif->handle = handle;
	netif->domid = xdev->otherend_id;
	netif->xdev = xdev;
	netif->bridge = bridge;
	xdev->data = netif;

	/* Set up ifnet structure */
	ifp = netif->ifp = if_alloc(IFT_ETHER);
	if (!ifp) {
		if (bridge)
			free(bridge, M_DEVBUF);
		free(netif, M_DEVBUF);
		return ENOMEM;
	}

	ifp->if_softc = netif;
	if_initname(ifp, "vif",
		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
	ifp->if_output = ether_output;
	ifp->if_start = netback_start;
	ifp->if_ioctl = netback_ioctl;
	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
	
	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);

	return 0;
}

static void
netif_get(netif_t *netif)
{
	atomic_add_int(&netif->ref_cnt, 1);
}

static void
netif_put(netif_t *netif)
{
	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
		DPRINTF("%s\n", IFNAME(netif));
		disconnect_rings(netif);
		if (netif->ifp) {
			if_free(netif->ifp);
			netif->ifp = NULL;
		}
		if (netif->bridge)
			free(netif->bridge, M_DEVBUF);
		free(netif, M_DEVBUF);
	}
}

static int
netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	switch (cmd) {
	case SIOCSIFFLAGS:
	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
		return 0;
	}

	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);

	return ether_ioctl(ifp, cmd, data);
}

static inline void
maybe_schedule_tx_action(void)
{
	smp_mb();
	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
		taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
}

/* Removes netif from front of list and does not call netif_put() (caller must) */
static netif_t *
remove_from_tx_schedule_list(void)
{
	netif_t *netif;

	mtx_lock(&tx_sched_list_lock);

	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
		STAILQ_NEXT(netif, next_tx) = NULL;
		netif->on_tx_sched_list = 0;
	}

	mtx_unlock(&tx_sched_list_lock);

	return netif;
}

/* Adds netif to end of list and calls netif_get() */
static void
add_to_tx_schedule_list_tail(netif_t *netif)
{
	if (netif->on_tx_sched_list)
		return;

	mtx_lock(&tx_sched_list_lock);
	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
		netif_get(netif);
		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
		netif->on_tx_sched_list = 1;
	}
	mtx_unlock(&tx_sched_list_lock);
}

/*
 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
 * If this driver is pipelining transmit requests then we can be very
 * aggressive in avoiding new-packet notifications -- frontend only needs to
 * send a notification if there are no outstanding unreceived responses.
 * If we may be buffer transmit buffers for any reason then we must be rather
 * more conservative and treat this as the final check for pending work.
 */
static void
netif_schedule_tx_work(netif_t *netif)
{
	int more_to_do;

#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
#else
	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
#endif

	if (more_to_do) {
		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
		add_to_tx_schedule_list_tail(netif);
		maybe_schedule_tx_action();
	}
}

static struct mtx dealloc_lock;
MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);

static void
netif_idx_release(uint16_t pending_idx)
{
	mtx_lock_spin(&dealloc_lock);
	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
	mtx_unlock_spin(&dealloc_lock);

	taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
}

static void
make_tx_response(netif_t *netif, 
				 uint16_t    id,
				 int8_t      st)
{
	RING_IDX i = netif->tx.rsp_prod_pvt;
	netif_tx_response_t *resp;
	int notify;

	resp = RING_GET_RESPONSE(&netif->tx, i);
	resp->id     = id;
	resp->status = st;

	netif->tx.rsp_prod_pvt = ++i;
	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
	if (notify)
		notify_remote_via_irq(netif->irq);

#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
	if (i == netif->tx.req_cons) {
		int more_to_do;
		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
		if (more_to_do)
			add_to_tx_schedule_list_tail(netif);
	}
#endif
}

static inline void
net_tx_action_dealloc(void)
{
	gnttab_unmap_grant_ref_t *gop;
	uint16_t pending_idx;
	PEND_RING_IDX dc, dp;
	netif_t *netif;
	int ret;

	dc = dealloc_cons;
	dp = dealloc_prod;

	/*
	 * Free up any grants we have finished using
	 */
	gop = tx_unmap_ops;
	while (dc != dp) {
		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
		gop->host_addr    = MMAP_VADDR(pending_idx);
		gop->dev_bus_addr = 0;
		gop->handle       = grant_tx_handle[pending_idx];
		gop++;
	}
	ret = HYPERVISOR_grant_table_op(
		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
	BUG_ON(ret);

	while (dealloc_cons != dp) {
		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];

		netif = pending_tx_info[pending_idx].netif;

		make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
				 NETIF_RSP_OKAY);
        
		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;

		netif_put(netif);
	}
}

static void
netif_page_release(void *buf, void *args)
{
	uint16_t pending_idx = (unsigned int)args;
	
	DDPRINTF("pending_idx=%u\n", pending_idx);

	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));

	netif_idx_release(pending_idx);
}

static void
net_tx_action(void *context, int pending)
{
	struct mbuf *m;
	netif_t *netif;
	netif_tx_request_t txreq;
	uint16_t pending_idx;
	RING_IDX i;
	gnttab_map_grant_ref_t *mop;
	int ret, work_to_do;
	struct mbuf *txq = NULL, *txq_last = NULL;

	if (dealloc_cons != dealloc_prod)
		net_tx_action_dealloc();

	mop = tx_map_ops;
	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {

		/* Get a netif from the list with work to do. */
		netif = remove_from_tx_schedule_list();

		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);

		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
		if (!work_to_do) {
			netif_put(netif);
			continue;
		}

		i = netif->tx.req_cons;
		rmb(); /* Ensure that we see the request before we copy it. */
		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));

		/* If we want credit-based scheduling, coud add it here - WORK */

		netif->tx.req_cons++;

		netif_schedule_tx_work(netif);

		if (unlikely(txreq.size < ETHER_HDR_LEN) || 
		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
			WPRINTF("Bad packet size: %d\n", txreq.size);
			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
			netif_put(netif);
			continue; 
		}

		/* No crossing a page as the payload mustn't fragment. */
		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
			WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 
				txreq.offset, txreq.size, 
				(txreq.offset & PAGE_MASK) + txreq.size);
			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
			netif_put(netif);
			continue;
		}

		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];

		MGETHDR(m, M_DONTWAIT, MT_DATA);
		if (!m) {
			WPRINTF("Failed to allocate mbuf\n");
			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
			netif_put(netif);
			break;
		}
		m->m_pkthdr.rcvif = netif->ifp;

		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
			struct mbuf *n;
			MGET(n, M_DONTWAIT, MT_DATA);
			if (!(m->m_next = n)) {
				m_freem(m);
				WPRINTF("Failed to allocate second mbuf\n");
				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
				netif_put(netif);
				break;
			}
			n->m_len = txreq.size - PKT_PROT_LEN;
			m->m_len = PKT_PROT_LEN;
		} else
			m->m_len = txreq.size;

		mop->host_addr = MMAP_VADDR(pending_idx);
		mop->dom       = netif->domid;
		mop->ref       = txreq.gref;
		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
		mop++;

		memcpy(&pending_tx_info[pending_idx].req,
		       &txreq, sizeof(txreq));
		pending_tx_info[pending_idx].netif = netif;
		*((uint16_t *)m->m_data) = pending_idx;

		if (txq_last)
			txq_last->m_nextpkt = m;
		else
			txq = m;
		txq_last = m;

		pending_cons++;

		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
			break;
	}

	if (!txq)
		return;

	ret = HYPERVISOR_grant_table_op(
		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
	BUG_ON(ret);

	mop = tx_map_ops;
	while ((m = txq) != NULL) {
		caddr_t data;

		txq = m->m_nextpkt;
		m->m_nextpkt = NULL;

		pending_idx = *((uint16_t *)m->m_data);
		netif       = pending_tx_info[pending_idx].netif;
		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));

		/* Check the remap error code. */
		if (unlikely(mop->status)) {
			WPRINTF("#### netback grant fails\n");
			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
			netif_put(netif);
			m_freem(m);
			mop++;
			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
			continue;
		}

#if 0
		/* Can't do this in FreeBSD since vtophys() returns the pfn */
		/* of the remote domain who loaned us the machine page - DPT */
		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
			mop->dev_bus_addr >> PAGE_SHIFT;
#endif
		grant_tx_handle[pending_idx] = mop->handle;

		/* Setup data in mbuf (lengths are already set) */
		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
		bcopy(data, m->m_data, m->m_len);
		if (m->m_next) {
			struct mbuf *n = m->m_next;
			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
			n->m_data = &data[PKT_PROT_LEN];
		} else {
			/* Schedule a response immediately. */
			netif_idx_release(pending_idx);
		}

		if ((txreq.flags & NETTXF_data_validated)) {
			/* Tell the stack the checksums are okay */
			m->m_pkthdr.csum_flags |=
				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
			m->m_pkthdr.csum_data = 0xffff;
		}

		/* If necessary, inform stack to compute the checksums if it forwards the packet */
		if ((txreq.flags & NETTXF_csum_blank)) {
			struct ether_header *eh = mtod(m, struct ether_header *);
			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
				struct ip *ip = (struct ip *)&m->m_data[14];
				if (ip->ip_p == IPPROTO_TCP)
					m->m_pkthdr.csum_flags |= CSUM_TCP;
				else if (ip->ip_p == IPPROTO_UDP)
					m->m_pkthdr.csum_flags |= CSUM_UDP;
			}
		}

		netif->ifp->if_ibytes += m->m_pkthdr.len;
		netif->ifp->if_ipackets++;

		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
		DPRINTF_MBUF_LEN(m, 128);

		(*netif->ifp->if_input)(netif->ifp, m);

		mop++;
	}
}

/* Handle interrupt from a frontend */
static void
netback_intr(void *arg)
{
	netif_t *netif = arg;
	DDPRINTF("%s\n", IFNAME(netif));
	add_to_tx_schedule_list_tail(netif);
	maybe_schedule_tx_action();
}

/* Removes netif from front of list and does not call netif_put() (caller must) */
static netif_t *
remove_from_rx_schedule_list(void)
{
	netif_t *netif;

	mtx_lock(&rx_sched_list_lock);

	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
		STAILQ_NEXT(netif, next_rx) = NULL;
		netif->on_rx_sched_list = 0;
	}

	mtx_unlock(&rx_sched_list_lock);

	return netif;
}

/* Adds netif to end of list and calls netif_get() */
static void
add_to_rx_schedule_list_tail(netif_t *netif)
{
	if (netif->on_rx_sched_list)
		return;

	mtx_lock(&rx_sched_list_lock);
	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
		netif_get(netif);
		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
		netif->on_rx_sched_list = 1;
	}
	mtx_unlock(&rx_sched_list_lock);
}

static int
make_rx_response(netif_t *netif, uint16_t id, int8_t st,
				 uint16_t offset, uint16_t size, uint16_t flags)
{
	RING_IDX i = netif->rx.rsp_prod_pvt;
	netif_rx_response_t *resp;
	int notify;

	resp = RING_GET_RESPONSE(&netif->rx, i);
	resp->offset     = offset;
	resp->flags      = flags;
	resp->id         = id;
	resp->status     = (int16_t)size;
	if (st < 0)
		resp->status = (int16_t)st;

	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
		i, resp->offset, resp->flags, resp->id, resp->status);

	netif->rx.rsp_prod_pvt = ++i;
	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);

	return notify;
}

static int
netif_rx(netif_t *netif)
{
	struct ifnet *ifp = netif->ifp;
	struct mbuf *m;
	multicall_entry_t *mcl;
	mmu_update_t *mmu;
	gnttab_transfer_t *gop;
	unsigned long vdata, old_mfn, new_mfn;
	struct mbuf *rxq = NULL, *rxq_last = NULL;
	int ret, notify = 0, pkts_dequeued = 0;

	DDPRINTF("%s\n", IFNAME(netif));

	mcl = rx_mcl;
	mmu = rx_mmu;
	gop = grant_rx_op;

	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
		
		/* Quit if the target domain has no receive buffers */
		if (netif->rx.req_cons == netif->rx.sring->req_prod)
			break;

		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
		if (m == NULL)
			break;

		pkts_dequeued++;

		/* Check if we need to copy the data */
		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
			struct mbuf *n;
				
			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
				m->m_flags,
				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
				(unsigned int)m->m_next);

			/* Make copy */
			MGETHDR(n, M_DONTWAIT, MT_DATA);
			if (!n)
				goto drop;

			MCLGET(n, M_DONTWAIT);
			if (!(n->m_flags & M_EXT)) {
				m_freem(n);
				goto drop;
			}

			/* Leave space at front and keep current alignment */
			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);

			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
				m_freem(n);
				goto drop;
			}
			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
			m_freem(m);
			m = n;
		}

		vdata = (unsigned long)m->m_data;
		old_mfn = vtomach(vdata) >> PAGE_SHIFT;

		if ((new_mfn = alloc_mfn()) == 0)
			goto drop;

#ifdef XEN_NETBACK_FIXUP_CSUM
		/* Check if we need to compute a checksum.  This happens */
		/* when bridging from one domain to another. */
		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ||
			(m->m_pkthdr.csum_flags & CSUM_SCTP))
			fixup_checksum(m);
#endif

		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;

		mcl->op = __HYPERVISOR_update_va_mapping;
		mcl->args[0] = vdata;
		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
		mcl->args[2] = 0;
		mcl->args[3] = 0;
		mcl++;

		gop->mfn = old_mfn;
		gop->domid = netif->domid;
		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
		netif->rx.req_cons++;
		gop++;

		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
		mmu->val = vtophys(vdata) >> PAGE_SHIFT;  
		mmu++;

		if (rxq_last)
			rxq_last->m_nextpkt = m;
		else
			rxq = m;
		rxq_last = m;

		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
		DPRINTF_MBUF_LEN(m, 128);

		/* Filled the batch queue? */
		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
			break;		

		continue;
	drop:
		DDPRINTF("dropping pkt\n");
		ifp->if_oerrors++;
		m_freem(m);
	}

	if (mcl == rx_mcl)
		return pkts_dequeued;

	mcl->op = __HYPERVISOR_mmu_update;
	mcl->args[0] = (unsigned long)rx_mmu;
	mcl->args[1] = mmu - rx_mmu;
	mcl->args[2] = 0;
	mcl->args[3] = DOMID_SELF;
	mcl++;

	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
	BUG_ON(ret != 0);

	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
	BUG_ON(ret != 0);

	mcl = rx_mcl;
	gop = grant_rx_op;

	while ((m = rxq) != NULL) {
		int8_t status;
		uint16_t id, flags = 0;

		rxq = m->m_nextpkt;
		m->m_nextpkt = NULL;

		/* Rederive the machine addresses. */
		new_mfn = mcl->args[1] >> PAGE_SHIFT;
		old_mfn = gop->mfn;

		ifp->if_obytes += m->m_pkthdr.len;
		ifp->if_opackets++;

		/* The update_va_mapping() must not fail. */
		BUG_ON(mcl->result != 0);

		/* Setup flags */
		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
			flags |= NETRXF_csum_blank | NETRXF_data_validated;
		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
			flags |= NETRXF_data_validated;

		/* Check the reassignment error code. */
		status = NETIF_RSP_OKAY;
		if (gop->status != 0) { 
			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
				gop->status, netif->domid);
			/*
			 * Page no longer belongs to us unless GNTST_bad_page,
			 * but that should be a fatal error anyway.
			 */
			BUG_ON(gop->status == GNTST_bad_page);
			status = NETIF_RSP_ERROR; 
		}
		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
		notify |= make_rx_response(netif, id, status,
					(unsigned long)m->m_data & PAGE_MASK,
					m->m_pkthdr.len, flags);

		m_freem(m);
		mcl++;
		gop++;
	}

	if (notify)
		notify_remote_via_irq(netif->irq);

	return pkts_dequeued;
}

static void
rx_task_timer(void *arg)
{
	DDPRINTF("\n");
	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
}

static void
net_rx_action(void *context, int pending)
{
	netif_t *netif, *last_zero_work = NULL;

	DDPRINTF("\n");

	while ((netif = remove_from_rx_schedule_list())) {
		struct ifnet *ifp = netif->ifp;

		if (netif == last_zero_work) {
			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
				add_to_rx_schedule_list_tail(netif);
			netif_put(netif);
			if (!STAILQ_EMPTY(&rx_sched_list))
				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
			break;
		}

		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
			if (netif_rx(netif))
				last_zero_work = NULL;
			else if (!last_zero_work)
				last_zero_work = netif;
			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
				add_to_rx_schedule_list_tail(netif);
		}

		netif_put(netif);
	}
}

static void
netback_start(struct ifnet *ifp)
{
	netif_t *netif = (netif_t *)ifp->if_softc;

	DDPRINTF("%s\n", IFNAME(netif));

	add_to_rx_schedule_list_tail(netif);
	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
}

/* Map a grant ref to a ring */
static int
map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
{
	struct gnttab_map_grant_ref op;

	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
	if (ring->va == 0)
		return ENOMEM;

	op.host_addr = ring->va;
	op.flags = GNTMAP_host_map;
	op.ref = ref;
	op.dom = dom;
	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
	if (op.status) {
		WPRINTF("grant table op err=%d\n", op.status);
		kmem_free(kernel_map, ring->va, PAGE_SIZE);
		ring->va = 0;
		return EACCES;
	}

	ring->handle = op.handle;
	ring->bus_addr = op.dev_bus_addr;

	return 0;
}

/* Unmap grant ref for a ring */
static void
unmap_ring(struct ring_ref *ring)
{
	struct gnttab_unmap_grant_ref op;

	op.host_addr = ring->va;
	op.dev_bus_addr = ring->bus_addr;
	op.handle = ring->handle;
	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
	if (op.status)
		WPRINTF("grant table op err=%d\n", op.status);

	kmem_free(kernel_map, ring->va, PAGE_SIZE);
	ring->va = 0;
}

static int
connect_rings(netif_t *netif)
{
	struct xenbus_device *xdev = netif->xdev;
	netif_tx_sring_t *txs;
	netif_rx_sring_t *rxs;
	unsigned long tx_ring_ref, rx_ring_ref;
	evtchn_port_t evtchn;
	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
	int err;

	// Grab FE data and map his memory
	err = xenbus_gather(NULL, xdev->otherend,
			"tx-ring-ref", "%lu", &tx_ring_ref,
		    "rx-ring-ref", "%lu", &rx_ring_ref,
		    "event-channel", "%u", &evtchn, NULL);
	if (err) {
		xenbus_dev_fatal(xdev, err,
			"reading %s/ring-ref and event-channel",
			xdev->otherend);
		return err;
	}

	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
	if (err) {
		xenbus_dev_fatal(xdev, err, "mapping tx ring");
		return err;
	}
	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);

	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
	if (err) {
		unmap_ring(&netif->tx_ring_ref);
		xenbus_dev_fatal(xdev, err, "mapping rx ring");
		return err;
	}
	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);

	op.u.bind_interdomain.remote_dom = netif->domid;
	op.u.bind_interdomain.remote_port = evtchn;
	err = HYPERVISOR_event_channel_op(&op);
	if (err) {
		unmap_ring(&netif->tx_ring_ref);
		unmap_ring(&netif->rx_ring_ref);
		xenbus_dev_fatal(xdev, err, "binding event channel");
		return err;
	}
	netif->evtchn = op.u.bind_interdomain.local_port;

	/* bind evtchn to irq handler */
	netif->irq =
		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);

	netif->rings_connected = 1;

	DPRINTF("%s connected! evtchn=%d irq=%d\n",
		IFNAME(netif), netif->evtchn, netif->irq);

	return 0;
}

static void
disconnect_rings(netif_t *netif)
{
	DPRINTF("\n");

	if (netif->rings_connected) {
		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
		netif->irq = 0;
		unmap_ring(&netif->tx_ring_ref);
		unmap_ring(&netif->rx_ring_ref);
		netif->rings_connected = 0;
	}
}

static void
connect(netif_t *netif)
{
	if (!netif->xdev ||
		!netif->attached ||
		netif->frontend_state != XenbusStateConnected) {
		return;
	}

	if (!connect_rings(netif)) {
		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);

		/* Turn on interface */
		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
		netif->ifp->if_flags |= IFF_UP;
	}
}

static int
netback_remove(struct xenbus_device *xdev)
{
	netif_t *netif = xdev->data;
	device_t ndev;

	DPRINTF("remove %s\n", xdev->nodename);

	if ((ndev = netif->ndev)) {
		netif->ndev = NULL;
		mtx_lock(&Giant);
		device_detach(ndev);
		mtx_unlock(&Giant);
	}

	xdev->data = NULL;
	netif->xdev = NULL;
	netif_put(netif);

	return 0;
}

/**
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures and the ring buffers for communication with the frontend.
 * Switch to Connected state.
 */
static int
netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
{
	int err;
	long handle;
	char *bridge;
	
	DPRINTF("node=%s\n", xdev->nodename);

	/* Grab the handle */
	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
	if (err != 1) {
		xenbus_dev_fatal(xdev, err, "reading handle");
		return err;
	}

	/* Check for bridge */
	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
	if (IS_ERR(bridge))
		bridge = NULL;

	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
	if (err) {
		xenbus_dev_fatal(xdev, err, "writing switch state");
		return err;
	}

	err = netif_create(handle, xdev, bridge);
	if (err) {
		xenbus_dev_fatal(xdev, err, "creating netif");
		return err;
	}

	err = vif_add_dev(xdev);
	if (err) {
		netif_put((netif_t *)xdev->data);
		xenbus_dev_fatal(xdev, err, "adding vif device");
		return err;
	}

	return 0;
}

/**
 * We are reconnecting to the backend, due to a suspend/resume, or a backend
 * driver restart.  We tear down our netif structure and recreate it, but
 * leave the device-layer structures intact so that this is transparent to the
 * rest of the kernel.
 */
static int netback_resume(struct xenbus_device *xdev)
{
	DPRINTF("node=%s\n", xdev->nodename);
	return 0;
}


/**
 * Callback received when the frontend's state changes.
 */
static void frontend_changed(struct xenbus_device *xdev,
							 XenbusState frontend_state)
{
	netif_t *netif = xdev->data;

	DPRINTF("state=%d\n", frontend_state);
	
	netif->frontend_state = frontend_state;

	switch (frontend_state) {
	case XenbusStateInitialising:
	case XenbusStateInitialised:
		break;
	case XenbusStateConnected:
		connect(netif);
		break;
	case XenbusStateClosing:
		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
		break;
	case XenbusStateClosed:
		xenbus_remove_device(xdev);
		break;
	case XenbusStateUnknown:
	case XenbusStateInitWait:
		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
						 frontend_state);
		break;
	}
}

/* ** Driver registration ** */

static struct xenbus_device_id netback_ids[] = {
	{ "vif" },
	{ "" }
};

static struct xenbus_driver netback = {
	.name = "netback",
	.ids = netback_ids,
	.probe = netback_probe,
	.remove = netback_remove,
	.resume= netback_resume,
	.otherend_changed = frontend_changed,
};

static void
netback_init(void *unused)
{
	callout_init(&rx_task_callout, CALLOUT_MPSAFE);

	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
	BUG_ON(!mmap_vstart);

	pending_cons = 0;
	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
		pending_ring[pending_prod] = pending_prod;

	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);

	DPRINTF("registering %s\n", netback.name);

	xenbus_register_backend(&netback);
}

SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)

static int
vif_add_dev(struct xenbus_device *xdev)
{
	netif_t *netif = xdev->data;
	device_t nexus, ndev;
	devclass_t dc;
	int err = 0;

	mtx_lock(&Giant);

	/* We will add a vif device as a child of nexus0 (for now) */
	if (!(dc = devclass_find("nexus")) ||
		!(nexus = devclass_get_device(dc, 0))) {
		WPRINTF("could not find nexus0!\n");
		err = ENOENT;
		goto done;
	}


	/* Create a newbus device representing the vif */
	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
	if (!ndev) {
		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
		err = EFAULT;
		goto done;
	}
	
	netif_get(netif);
	device_set_ivars(ndev, netif);
	netif->ndev = ndev;

	device_probe_and_attach(ndev);

 done:

	mtx_unlock(&Giant);

	return err;
}

enum {
	VIF_SYSCTL_DOMID,
	VIF_SYSCTL_HANDLE,
	VIF_SYSCTL_TXRING,
	VIF_SYSCTL_RXRING,
};

static char *
vif_sysctl_ring_info(netif_t *netif, int cmd)
{
	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
	if (buf) {
		if (!netif->rings_connected)
			sprintf(buf, "rings not connected\n");
		else if (cmd == VIF_SYSCTL_TXRING) {
			netif_tx_back_ring_t *tx = &netif->tx;
			sprintf(buf, "nr_ents=%x req_cons=%x"
					" req_prod=%x req_event=%x"
					" rsp_prod=%x rsp_event=%x",
					tx->nr_ents, tx->req_cons,
					tx->sring->req_prod, tx->sring->req_event,
					tx->sring->rsp_prod, tx->sring->rsp_event);
		} else {
			netif_rx_back_ring_t *rx = &netif->rx;
			sprintf(buf, "nr_ents=%x req_cons=%x"
					" req_prod=%x req_event=%x"
					" rsp_prod=%x rsp_event=%x",
					rx->nr_ents, rx->req_cons,
					rx->sring->req_prod, rx->sring->req_event,
					rx->sring->rsp_prod, rx->sring->rsp_event);
		}
	}
	return buf;
}

static int
vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
{
	device_t dev = (device_t)arg1;
	netif_t *netif = (netif_t *)device_get_ivars(dev);
	const char *value;
	char *buf = NULL;
	int err;

	switch (arg2) {
	case VIF_SYSCTL_DOMID:
		return sysctl_handle_int(oidp, NULL, netif->domid, req);
	case VIF_SYSCTL_HANDLE:
		return sysctl_handle_int(oidp, NULL, netif->handle, req);
	case VIF_SYSCTL_TXRING:
	case VIF_SYSCTL_RXRING:
		value = buf = vif_sysctl_ring_info(netif, arg2);
		break;
	default:
		return (EINVAL);
	}

	err = SYSCTL_OUT(req, value, strlen(value));
	if (buf != NULL)
		free(buf, M_DEVBUF);

	return err;
}

/* Newbus vif device driver probe */
static int
vif_probe(device_t dev)
{
	DDPRINTF("vif%d\n", device_get_unit(dev));
	return 0;
}

/* Newbus vif device driver attach */
static int
vif_attach(device_t dev) 
{
	netif_t *netif = (netif_t *)device_get_ivars(dev);
	uint8_t mac[ETHER_ADDR_LEN];

	DDPRINTF("%s\n", IFNAME(netif));

	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
	    "domid of frontend");
	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
	    "handle of frontend");
#ifdef XEN_NETBACK_DEBUG
	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
	    OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD,
	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
	    "tx ring info");
	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
	    OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD,
	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
	    "rx ring info");
#endif

	memset(mac, 0xff, sizeof(mac));
	mac[0] &= ~0x01;
	
	ether_ifattach(netif->ifp, mac);
	netif->attached = 1;

	connect(netif);

	if (netif->bridge) {
		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
		int err = add_to_bridge(netif->ifp, netif->bridge);
		if (err) {
			WPRINTF("Error adding %s to %s; err=%d\n",
				IFNAME(netif), netif->bridge, err);
		}
	}

	return bus_generic_attach(dev);
}

/* Newbus vif device driver detach */
static int
vif_detach(device_t dev)
{
	netif_t *netif = (netif_t *)device_get_ivars(dev);
	struct ifnet *ifp = netif->ifp;

	DDPRINTF("%s\n", IFNAME(netif));

	/* Tell the stack that the interface is no longer active */
	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);

	ether_ifdetach(ifp);

	bus_generic_detach(dev);

	netif->attached = 0;

	netif_put(netif);

	return 0;
}

static device_method_t vif_methods[] = {
	/* Device interface */
	DEVMETHOD(device_probe,		vif_probe),
	DEVMETHOD(device_attach, 	vif_attach),
	DEVMETHOD(device_detach,	vif_detach),
	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
	DEVMETHOD(device_suspend,	bus_generic_suspend),
	DEVMETHOD(device_resume,	bus_generic_resume),
	{0, 0}
};

static devclass_t vif_devclass;

static driver_t vif_driver = {
	"vif",
	vif_methods,
	0,
};

DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);


/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * tab-width: 4
 * indent-tabs-mode: t
 * End:
 */