12 files changed, 8270 insertions, 0 deletions
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
new file mode 100644
index 0000000..fa49196
--- /dev/null
+++ b/sys/dev/xen/balloon/balloon.c
@@ -0,0 +1,446 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/xenbus.h>
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+struct mtx balloon_lock;
+#ifdef notyet
+
+/* We aim for 'current allocation' == 'target allocation'. */
+static unsigned long current_pages;
+static unsigned long target_pages;
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+/* We may hit the hard limit in Xen. If we do then we remember it. */
+static unsigned long hard_limit;
+
+/*
+ * Drivers may alter the memory reservation independently, but they must
+ * inform the balloon driver so that we can avoid hitting the hard limit.
+ */
+static unsigned long driver_pages;
+
+struct balloon_entry {
+	vm_page_t page;
+	STAILQ_ENTRY(balloon_entry) list;
+};
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+
+static unsigned long balloon_low, balloon_high;
+
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+
+#define IPRINTK(fmt, args...) \
+	printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+	printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
+/* balloon_append: add the given page to the balloon. */
+static void 
+balloon_append(vm_page_t page)
+{
+	struct balloon_entry *entry;
+
+	entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
+
+	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
+	balloon_low++;
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static vm_page_t
+balloon_retrieve(void)
+{
+	vm_page_t page;
+	struct balloon_entry *entry;
+
+	if (STAILQ_EMPTY(&ballooned_pages))
+		return NULL;
+
+	entry = STAILQ_FIRST(&ballooned_pages);
+	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
+
+	page = entry->page;
+	free(entry, M_DEVBUF);
+	
+	balloon_low--;
+
+	return page;
+}
+
+static void 
+balloon_alarm(unsigned long unused)
+{
+	wakeup(balloon_process);
+}
+
+static unsigned long 
+current_target(void)
+{
+	unsigned long target = min(target_pages, hard_limit);
+	if (target > (current_pages + balloon_low + balloon_high))
+		target = current_pages + balloon_low + balloon_high;
+	return target;
+}
+
+static int 
+increase_reservation(unsigned long nr_pages)
+{
+	unsigned long *mfn_list, pfn, i, flags;
+	struct page   *page;
+	long           rc;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+		nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+	mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (mfn_list == NULL)
+		return ENOMEM;
+
+
+	reservation.extent_start = mfn_list;
+	reservation.nr_extents   = nr_pages;
+	rc = HYPERVISOR_memory_op(
+		XENMEM_increase_reservation, &reservation);
+	if (rc < nr_pages) {
+		int ret;
+		/* We hit the Xen hard limit: reprobe. */
+		reservation.extent_start = mfn_list;
+		reservation.nr_extents   = rc;
+		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+				&reservation);
+		PANIC_IF(ret != rc);
+		hard_limit = current_pages + rc - driver_pages;
+		goto out;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		page = balloon_retrieve();
+		PANIC_IF(page == NULL);
+
+		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+		PANIC_IF(phys_to_machine_mapping_valid(pfn));
+
+		/* Update P->M and M->P tables. */
+		PFNTOMFN(pfn) = mfn_list[i];
+		xen_machphys_update(mfn_list[i], pfn);
+            
+		/* Relinquish the page back to the allocator. */
+		ClearPageReserved(page);
+		set_page_count(page, 1);
+		vm_page_free(page);
+	}
+
+	current_pages += nr_pages;
+	totalram_pages = current_pages;
+
+ out:
+	balloon_unlock(flags);
+
+	free((mfn_list);
+
+	return 0;
+}
+
+static int 
+decrease_reservation(unsigned long nr_pages)
+{
+	unsigned long *mfn_list, pfn, i, flags;
+	struct page   *page;
+	void          *v;
+	int            need_sleep = 0;
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+		nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+	mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (mfn_list == NULL)
+		return ENOMEM;
+
+	for (i = 0; i < nr_pages; i++) {
+		int color = 0;
+		if ((page = vm_page_alloc(NULL, color++, 
+					  VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
+					  VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+			nr_pages = i;
+			need_sleep = 1;
+			break;
+		}
+		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+		mfn_list[i] = PFNTOMFN(pfn);
+	}
+
+	balloon_lock(flags);
+
+	/* No more mappings: invalidate P2M and add to balloon. */
+	for (i = 0; i < nr_pages; i++) {
+		pfn = MFNTOPFN(mfn_list[i]);
+		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+	}
+
+	reservation.extent_start = mfn_list;
+	reservation.nr_extents   = nr_pages;
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	PANIC_IF(ret != nr_pages);
+
+	current_pages -= nr_pages;
+	totalram_pages = current_pages;
+
+	balloon_unlock(flags);
+
+	free(mfn_list, M_DEVBUF);
+
+	return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void 
+balloon_process(void *unused)
+{
+	int need_sleep = 0;
+	long credit;
+	
+	for (;;) {
+		do {
+			credit = current_target() - current_pages;
+			if (credit > 0)
+				need_sleep = (increase_reservation(credit) != 0);
+			if (credit < 0)
+				need_sleep = (decrease_reservation(-credit) != 0);
+			
+#ifndef CONFIG_PREEMPT
+			if (need_resched())
+				schedule();
+#endif
+		} while ((credit != 0) && !need_sleep);
+		
+		/* Schedule more work if there is some still to be done. */
+		if (current_target() != current_pages)
+			timeout(balloon_alarm, NULL, ticks + HZ);
+
+			msleep(balloon_process, balloon_lock, 0, "balloon", -1);
+	}
+
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void 
+set_new_target(unsigned long target)
+{
+	/* No need for lock. Not read-modify-write updates. */
+	hard_limit   = ~0UL;
+	target_pages = target;
+	wakeup(balloon_process);
+}
+
+static struct xenbus_watch target_watch =
+{
+	.node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void 
+watch_target(struct xenbus_watch *watch,
+	     const char **vec, unsigned int len)
+{
+	unsigned long long new_target;
+	int err;
+
+	err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
+	if (err != 1) {
+		/* This is ok (for domain0 at least) - so just return */
+		return;
+	} 
+        
+	/* The given memory/target value is in KiB, so it needs converting to
+	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+	*/
+	set_new_target(new_target >> (PAGE_SHIFT - 10));
+    
+}
+
+static void 
+balloon_init_watcher(void *)
+{
+	int err;
+
+	err = register_xenbus_watch(&target_watch);
+	if (err)
+		printf("Failed to set balloon watcher\n");
+
+}
+
+static void 
+balloon_init(void *)
+{
+	unsigned long pfn;
+	struct page *page;
+
+	IPRINTK("Initialising balloon driver.\n");
+
+	if (xen_init() < 0)
+		return -1;
+
+	current_pages = min(xen_start_info->nr_pages, max_pfn);
+	target_pages  = current_pages;
+	balloon_low   = 0;
+	balloon_high  = 0;
+	driver_pages  = 0UL;
+	hard_limit    = ~0UL;
+
+	init_timer(&balloon_timer);
+	balloon_timer.data = 0;
+	balloon_timer.function = balloon_alarm;
+    
+	/* Initialise the balloon with excess memory space. */
+	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
+		balloon_append(page);
+	}
+
+	target_watch.callback = watch_target;
+    
+	return 0;
+}
+
+void 
+balloon_update_driver_allowance(long delta)
+{
+	unsigned long flags;
+
+	balloon_lock(flags);
+	driver_pages += delta;
+	balloon_unlock(flags);
+}
+
+#if 0
+static int dealloc_pte_fn(
+	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+	unsigned long mfn = pte_mfn(*pte);
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.extent_start = &mfn,
+		.nr_extents   = 1,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
+	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	PANIC_IF(ret != 1);
+	return 0;
+}
+
+#endif
+vm_page_t
+balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+	unsigned long flags;
+	vm_page_t pages;
+	int i;
+	unsigned long *mfn_list;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
+	if (pages == NULL)
+		return NULL;
+	
+	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
+	
+	for (i = 0; i < nr_pages; i++) {
+		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
+		PFNTOMFN(i) = INVALID_P2M_ENTRY;
+		reservation.extent_start = mfn_list;
+		reservation.nr_extents = nr_pages;
+		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages);
+	}
+
+	current_pages -= nr_pages;
+
+	wakeup(balloon_process);
+
+	return pages;
+}
+
+void 
+balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
+{
+	unsigned long i, flags;
+
+	for (i = 0; i < nr_pages; i++)
+		balloon_append(page + i);
+
+	wakeup(balloon_process);
+}
+
+#endif
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
new file mode 100644
index 0000000..630a0bd
--- /dev/null
+++ b/sys/dev/xen/blkback/blkback.c
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/disk.h>
+#include <sys/bio.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <geom/geom.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#if XEN_BLKBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+    printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+struct ring_ref {
+	vm_offset_t va;
+	grant_handle_t handle;
+	uint64_t bus_addr;
+};
+
+typedef struct blkback_info {
+
+	/* Schedule lists */
+	STAILQ_ENTRY(blkback_info) next_req;
+	int on_req_sched_list;
+
+	struct xenbus_device *xdev;
+	XenbusState frontend_state;
+
+	domid_t domid;
+
+	int state;
+	int ring_connected;
+	struct ring_ref rr;
+	blkif_back_ring_t ring;
+	evtchn_port_t evtchn;
+	int irq;
+	void *irq_cookie;
+
+	int ref_cnt;
+
+	int handle;
+	char *mode;
+	char *type;
+	char *dev_name;
+
+	struct vnode *vn;
+	struct cdev *cdev;
+	struct cdevsw *csw;
+	u_int sector_size;
+	int sector_size_shift;
+	off_t media_size;
+	u_int media_num_sectors;
+	int major;
+	int minor;
+	int read_only;
+
+	struct mtx blk_ring_lock;
+
+	device_t ndev;
+
+	/* Stats */
+	int st_rd_req;
+	int st_wr_req;
+	int st_oo_req;
+	int st_err_req;
+} blkif_t;
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ * 
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int blkif_reqs = 64;
+TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
+
+static int mmap_pages;
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct pending_req {
+	blkif_t       *blkif;
+	uint64_t       id;
+	int            nr_pages;
+	int            pendcnt;
+	unsigned short operation;
+	int            status;
+	STAILQ_ENTRY(pending_req) free_list;
+} pending_req_t;
+
+static pending_req_t *pending_reqs;
+static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
+	STAILQ_HEAD_INITIALIZER(pending_free);
+static struct mtx pending_free_lock;
+
+static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
+	STAILQ_HEAD_INITIALIZER(req_sched_list);
+static struct mtx req_sched_list_lock;
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static grant_handle_t *pending_grant_handles;
+
+static struct task blk_req_task;
+
+/* Protos */
+static void disconnect_ring(blkif_t *blkif);
+static int vbd_add_dev(struct xenbus_device *xdev);
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+	return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+	return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+	(pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+	void *pages;
+	int i = 0, j = 0;
+	multicall_entry_t mcl[17];
+	unsigned long mfn_list[16];
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = 0,
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pages == NULL)
+		return 0;
+
+	memset(mcl, 0, sizeof(mcl));
+
+	while (i < nr_pages) {
+		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+		mcl[j].op = __HYPERVISOR_update_va_mapping;
+		mcl[j].args[0] = va;
+
+		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+		if (j == 16 || i == nr_pages) {
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+			reservation.nr_extents = j;
+
+			mcl[j].op = __HYPERVISOR_memory_op;
+			mcl[j].args[0] = XENMEM_decrease_reservation;
+			mcl[j].args[1] =  (unsigned long)&reservation;
+			
+			(void)HYPERVISOR_multicall(mcl, j+1);
+
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+			j = 0;
+		}
+	}
+
+	return (unsigned long)pages;
+}
+
+static pending_req_t *
+alloc_req(void)
+{
+	pending_req_t *req;
+	mtx_lock(&pending_free_lock);
+	if ((req = STAILQ_FIRST(&pending_free))) {
+		STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
+		STAILQ_NEXT(req, free_list) = NULL;
+	}
+	mtx_unlock(&pending_free_lock);
+	return req;
+}
+
+static void
+free_req(pending_req_t *req)
+{
+	int was_empty;
+
+	mtx_lock(&pending_free_lock);
+	was_empty = STAILQ_EMPTY(&pending_free);
+	STAILQ_INSERT_TAIL(&pending_free, req, free_list);
+	mtx_unlock(&pending_free_lock);
+	if (was_empty)
+		taskqueue_enqueue(taskqueue_swi, &blk_req_task); 
+}
+
+static void
+fast_flush_area(pending_req_t *req)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int i, invcount = 0;
+	grant_handle_t handle;
+	int ret;
+
+	for (i = 0; i < req->nr_pages; i++) {
+		handle = pending_handle(req, i);
+		if (handle == BLKBACK_INVALID_HANDLE)
+			continue;
+		unmap[invcount].host_addr    = vaddr(req, i);
+		unmap[invcount].dev_bus_addr = 0;
+		unmap[invcount].handle       = handle;
+		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+		invcount++;
+	}
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, unmap, invcount);
+	PANIC_IF(ret);
+}
+
+static void
+blkif_get(blkif_t *blkif)
+{
+	atomic_add_int(&blkif->ref_cnt, 1);
+}
+
+static void
+blkif_put(blkif_t *blkif)
+{
+	if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
+		DPRINTF("Removing %x\n", (unsigned int)blkif);
+		disconnect_ring(blkif);
+		if (blkif->mode)
+			free(blkif->mode, M_DEVBUF);			
+		if (blkif->type)
+			free(blkif->type, M_DEVBUF);			
+		if (blkif->dev_name)
+			free(blkif->dev_name, M_DEVBUF);			
+		free(blkif, M_DEVBUF);
+	}
+}
+
+static int
+blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
+{
+	blkif_t *blkif;
+
+	blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!blkif)
+		return ENOMEM;
+	
+	DPRINTF("Created %x\n", (unsigned int)blkif);
+
+	blkif->ref_cnt = 1;
+	blkif->domid = xdev->otherend_id;
+	blkif->handle = handle;
+	blkif->mode = mode;
+	blkif->type = type;
+	blkif->dev_name = params;
+	blkif->xdev = xdev;
+	xdev->data = blkif;
+
+	mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
+
+	if (strcmp(mode, "w"))
+		blkif->read_only = 1;
+
+	return 0;
+}
+
+static void
+add_to_req_schedule_list_tail(blkif_t *blkif)
+{
+	if (!blkif->on_req_sched_list) {
+		mtx_lock(&req_sched_list_lock);
+		if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
+			blkif_get(blkif);
+			STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+			blkif->on_req_sched_list = 1;
+			taskqueue_enqueue(taskqueue_swi, &blk_req_task); 
+		}
+		mtx_unlock(&req_sched_list_lock);
+	}
+}
+
+/* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
+   and assumes that the state is connected */
+static void
+add_to_req_schedule_list_tail2(blkif_t *blkif)
+{
+	mtx_lock(&req_sched_list_lock);
+	if (!blkif->on_req_sched_list) {
+		STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+		blkif->on_req_sched_list = 1;
+	}
+	mtx_unlock(&req_sched_list_lock);
+}
+
+/* Removes blkif from front of list and does not call blkif_put() (caller must) */
+static blkif_t *
+remove_from_req_schedule_list(void)
+{
+	blkif_t *blkif;
+
+	mtx_lock(&req_sched_list_lock);
+
+	if ((blkif = STAILQ_FIRST(&req_sched_list))) {
+		STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
+		STAILQ_NEXT(blkif, next_req) = NULL;
+		blkif->on_req_sched_list = 0;
+	}
+
+	mtx_unlock(&req_sched_list_lock);
+
+	return blkif;
+}
+
+static void
+make_response(blkif_t *blkif, uint64_t id, 
+			  unsigned short op, int st)
+{
+	blkif_response_t *resp;
+	blkif_back_ring_t *blk_ring = &blkif->ring;
+	int more_to_do = 0;
+	int notify;
+
+	mtx_lock(&blkif->blk_ring_lock);
+
+
+	/* Place on the response ring for the relevant domain. */ 
+	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+	resp->id        = id;
+	resp->operation = op;
+	resp->status    = st;
+	blk_ring->rsp_prod_pvt++;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
+
+	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
+		/*
+		 * Tail check for pending requests. Allows frontend to avoid
+		 * notifications if requests are already in flight (lower
+		 * overheads and promotes batching).
+		 */
+		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
+
+	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
+		more_to_do = 1;
+
+	mtx_unlock(&blkif->blk_ring_lock);
+
+	if (more_to_do)
+		add_to_req_schedule_list_tail(blkif);
+
+	if (notify)
+		notify_remote_via_irq(blkif->irq);
+}
+
+static void
+end_block_io_op(struct bio *bio)
+{
+	pending_req_t *pending_req = bio->bio_caller2;
+
+	if (bio->bio_error) {
+		DPRINTF("BIO returned error %d for operation on device %s\n",
+				bio->bio_error, pending_req->blkif->dev_name);
+		pending_req->status = BLKIF_RSP_ERROR;
+		pending_req->blkif->st_err_req++;
+	}
+
+#if 0
+	printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
+		   (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
+#endif
+
+	if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
+		fast_flush_area(pending_req);
+		make_response(pending_req->blkif, pending_req->id,
+			      pending_req->operation, pending_req->status);
+		blkif_put(pending_req->blkif);
+		free_req(pending_req);
+	}
+
+	g_destroy_bio(bio);
+}
+
+static void
+dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
+{
+	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct { 
+		unsigned long buf; unsigned int nsec;
+	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int nseg = req->nr_segments, nr_sects = 0;
+	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int operation, ret, i, nbio = 0;
+
+	/* Check that number of segments is sane. */
+	if (unlikely(nseg == 0) || 
+	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+		DPRINTF("Bad number of segments in request (%d)\n", nseg);
+		goto fail_response;
+	}
+
+	if (req->operation == BLKIF_OP_WRITE) {
+		if (blkif->read_only) {
+			DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
+			goto fail_response;
+		}
+		operation = BIO_WRITE;
+	} else
+		operation = BIO_READ;
+
+	pending_req->blkif     = blkif;
+	pending_req->id        = req->id;
+	pending_req->operation = req->operation;
+	pending_req->status    = BLKIF_RSP_OKAY;
+	pending_req->nr_pages  = nseg;
+
+	for (i = 0; i < nseg; i++) {
+		seg[i].nsec = req->seg[i].last_sect - 
+			req->seg[i].first_sect + 1;
+
+		if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (seg[i].nsec <= 0))
+			goto fail_response;
+		nr_sects += seg[i].nsec;
+
+		map[i].host_addr = vaddr(pending_req, i);
+		map[i].dom = blkif->domid;
+		map[i].ref = req->seg[i].gref;
+		map[i].flags = GNTMAP_host_map;
+		if (operation == BIO_WRITE)
+			map[i].flags |= GNTMAP_readonly;
+	}
+
+	/* Convert to the disk's sector size */
+	nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+	PANIC_IF(ret);
+
+	for (i = 0; i < nseg; i++) {
+		if (unlikely(map[i].status != 0)) {
+			DPRINTF("invalid buffer -- could not remap it\n");
+			goto fail_flush;
+		}
+
+		pending_handle(pending_req, i) = map[i].handle;
+#if 0
+		/* Can't do this in FreeBSD since vtophys() returns the pfn */
+		/* of the remote domain who loaned us the machine page - DPT */
+		xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
+			map[i]dev_bus_addr >> PAGE_SHIFT;
+#endif
+		seg[i].buf  = map[i].dev_bus_addr | 
+			(req->seg[i].first_sect << 9);
+	}
+
+	if (req->sector_number + nr_sects > blkif->media_num_sectors) {
+		DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
+			operation == BIO_READ ? "read" : "write",
+			req->sector_number,
+			req->sector_number + nr_sects, blkif->dev_name); 
+		goto fail_flush;
+	}
+
+	for (i = 0; i < nseg; i++) {
+		struct bio *bio;
+
+		if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
+			DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
+			goto fail_put_bio;
+		}
+
+		bio = biolist[nbio++] = g_new_bio();
+		if (unlikely(bio == NULL))
+			goto fail_put_bio;
+
+		bio->bio_cmd = operation;
+		bio->bio_offset = req->sector_number << blkif->sector_size_shift;
+		bio->bio_length = seg[i].nsec << 9;
+		bio->bio_bcount = bio->bio_length;
+		bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
+		bio->bio_done = end_block_io_op;
+		bio->bio_caller2 = pending_req;
+		bio->bio_dev = blkif->cdev;
+
+		req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
+#if 0
+		printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
+			(unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
+			blkif->cdev->si_iosize_max, seg[i].buf);
+#endif
+	}
+
+	pending_req->pendcnt = nbio;
+	blkif_get(blkif);
+
+	for (i = 0; i < nbio; i++)
+		(*blkif->csw->d_strategy)(biolist[i]);
+
+	return;
+
+ fail_put_bio:
+	for (i = 0; i < (nbio-1); i++)
+		g_destroy_bio(biolist[i]);
+ fail_flush:
+	fast_flush_area(pending_req);
+ fail_response:
+	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+	free_req(pending_req);
+}
+
+static void
+blk_req_action(void *context, int pending)
+{
+	blkif_t *blkif;
+
+	DPRINTF("\n");
+
+	while (!STAILQ_EMPTY(&req_sched_list)) {
+		blkif_back_ring_t *blk_ring;
+		RING_IDX rc, rp;
+
+		blkif = remove_from_req_schedule_list();
+
+		blk_ring = &blkif->ring;
+		rc = blk_ring->req_cons;
+		rp = blk_ring->sring->req_prod;
+		rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+		while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
+			blkif_request_t *req;
+			pending_req_t *pending_req;
+
+			pending_req = alloc_req();
+			if (pending_req == NULL)
+				goto out_of_preqs;
+
+			req = RING_GET_REQUEST(blk_ring, rc);
+			blk_ring->req_cons = ++rc; /* before make_response() */
+
+			switch (req->operation) {
+			case BLKIF_OP_READ:
+				blkif->st_rd_req++;
+				dispatch_rw_block_io(blkif, req, pending_req);
+				break;
+			case BLKIF_OP_WRITE:
+				blkif->st_wr_req++;
+				dispatch_rw_block_io(blkif, req, pending_req);
+				break;
+			default:
+				blkif->st_err_req++;
+				DPRINTF("error: unknown block io operation [%d]\n",
+						req->operation);
+				make_response(blkif, req->id, req->operation,
+							  BLKIF_RSP_ERROR);
+				free_req(pending_req);
+				break;
+			}
+		}
+
+		blkif_put(blkif);
+	}
+
+	return;
+
+ out_of_preqs:
+	/* We ran out of pending req structs */
+	/* Just requeue interface and wait to be rescheduled to run when one is freed */
+	add_to_req_schedule_list_tail2(blkif);
+	blkif->st_oo_req++;
+}
+
+/* Handle interrupt from a frontend */
+static void
+blkback_intr(void *arg)
+{
+	blkif_t *blkif = arg;
+	DPRINTF("%x\n", (unsigned int)blkif);
+	add_to_req_schedule_list_tail(blkif);
+}
+
+/* Map grant ref for ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+	struct gnttab_map_grant_ref op;
+
+	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	if (ring->va == 0)
+		return ENOMEM;
+
+	op.host_addr = ring->va;
+	op.flags = GNTMAP_host_map;
+	op.ref = ref;
+	op.dom = dom;
+	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+	if (op.status) {
+		WPRINTF("grant table op err=%d\n", op.status);
+		kmem_free(kernel_map, ring->va, PAGE_SIZE);
+		ring->va = 0;
+		return EACCES;
+	}
+
+	ring->handle = op.handle;
+	ring->bus_addr = op.dev_bus_addr;
+
+	return 0;
+}
+
+/* Unmap grant ref for ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	op.host_addr = ring->va;
+	op.dev_bus_addr = ring->bus_addr;
+	op.handle = ring->handle;
+	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+	if (op.status)
+		WPRINTF("grant table op err=%d\n", op.status);
+
+	kmem_free(kernel_map, ring->va, PAGE_SIZE);
+	ring->va = 0;
+}
+
+static int
+connect_ring(blkif_t *blkif)
+{
+	struct xenbus_device *xdev = blkif->xdev;
+	blkif_sring_t *ring;
+	unsigned long ring_ref;
+	evtchn_port_t evtchn;
+	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+	int err;
+
+	if (blkif->ring_connected)
+		return 0;
+
+	// Grab FE data and map his memory
+	err = xenbus_gather(NULL, xdev->otherend,
+			"ring-ref", "%lu", &ring_ref,
+		    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(xdev, err,
+			"reading %s/ring-ref and event-channel",
+			xdev->otherend);
+		return err;
+	}
+
+	err = map_ring(ring_ref, blkif->domid, &blkif->rr);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "mapping ring");
+		return err;
+	}
+	ring = (blkif_sring_t *)blkif->rr.va;
+	BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
+
+	op.u.bind_interdomain.remote_dom = blkif->domid;
+	op.u.bind_interdomain.remote_port = evtchn;
+	err = HYPERVISOR_event_channel_op(&op);
+	if (err) {
+		unmap_ring(&blkif->rr);
+		xenbus_dev_fatal(xdev, err, "binding event channel");
+		return err;
+	}
+	blkif->evtchn = op.u.bind_interdomain.local_port;
+
+	/* bind evtchn to irq handler */
+	blkif->irq =
+		bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
+			blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
+
+	blkif->ring_connected = 1;
+
+	DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
+			(unsigned int)blkif, blkif->evtchn, blkif->irq);
+
+	return 0;
+}
+
+static void
+disconnect_ring(blkif_t *blkif)
+{
+	DPRINTF("\n");
+
+	if (blkif->ring_connected) {
+		unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
+		blkif->irq = 0;
+		unmap_ring(&blkif->rr);
+		blkif->ring_connected = 0;
+	}
+}
+
+static void
+connect(blkif_t *blkif)
+{
+	struct xenbus_transaction *xbt;
+	struct xenbus_device *xdev = blkif->xdev;
+	int err;
+
+	if (!blkif->ring_connected ||
+		blkif->vn == NULL ||
+		blkif->state == XenbusStateConnected)
+		return;
+
+	DPRINTF("%s\n", xdev->otherend);
+
+	/* Supply the information about the device the frontend needs */
+again:
+	xbt = xenbus_transaction_start();
+	if (IS_ERR(xbt)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(xbt),
+						 "Error writing configuration for backend "
+						 "(start transaction)");
+		return;
+	}
+
+	err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
+				blkif->media_num_sectors);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/sectors",
+				 xdev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
+				blkif->read_only ? VDISK_READONLY : 0);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/info",
+				 xdev->nodename);
+		goto abort;
+	}
+	err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
+			    blkif->sector_size);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
+				 xdev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		xenbus_dev_fatal(xdev, err, "ending transaction");
+
+	err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
+	if (err)
+		xenbus_dev_fatal(xdev, err, "switching to Connected state",
+				 xdev->nodename);
+
+	blkif->state = XenbusStateConnected;
+
+	return;
+
+ abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+static int
+blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+	int err;
+	char *p, *mode = NULL, *type = NULL, *params = NULL;
+	long handle;
+
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	p = strrchr(xdev->otherend, '/') + 1;
+	handle = strtoul(p, NULL, 0);
+
+	mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
+	if (IS_ERR(mode)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
+		err = PTR_ERR(mode);
+		goto error;
+	}
+	
+	type = xenbus_read(NULL, xdev->nodename, "type", NULL);
+	if (IS_ERR(type)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
+		err = PTR_ERR(type);
+		goto error;
+	}
+	
+	params = xenbus_read(NULL, xdev->nodename, "params", NULL);
+	if (IS_ERR(type)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
+		err = PTR_ERR(params);
+		goto error;
+	}
+	
+	err = blkif_create(xdev, handle, mode, type, params);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "creating blkif");
+		goto error;
+	}
+
+	err = vbd_add_dev(xdev);
+	if (err) {
+		blkif_put((blkif_t *)xdev->data);
+		xenbus_dev_fatal(xdev, err, "adding vbd device");
+	}
+
+	return err;
+
+ error:
+	if (mode)
+		free(mode, M_DEVBUF);
+	if (type)
+		free(type, M_DEVBUF);
+	if (params)
+		free(params, M_DEVBUF);
+	return err;
+}
+
+static int
+blkback_remove(struct xenbus_device *xdev)
+{
+	blkif_t *blkif = xdev->data;
+	device_t ndev;
+
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	blkif->state = XenbusStateClosing;
+
+	if ((ndev = blkif->ndev)) {
+		blkif->ndev = NULL;
+		mtx_lock(&Giant);
+		device_detach(ndev);
+		mtx_unlock(&Giant);
+	}
+
+	xdev->data = NULL;
+	blkif->xdev = NULL;
+	blkif_put(blkif);
+
+	return 0;
+}
+
+static int
+blkback_resume(struct xenbus_device *xdev)
+{
+	DPRINTF("node=%s\n", xdev->nodename);
+	return 0;
+}
+
+static void
+frontend_changed(struct xenbus_device *xdev,
+				 XenbusState frontend_state)
+{
+	blkif_t *blkif = xdev->data;
+
+	DPRINTF("state=%d\n", frontend_state);
+
+	blkif->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		break;
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		connect_ring(blkif);
+		connect(blkif);
+		break;
+	case XenbusStateClosing:
+		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+		break;
+	case XenbusStateClosed:
+		xenbus_remove_device(xdev);
+		break;
+	case XenbusStateUnknown:
+	case XenbusStateInitWait:
+		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+						 frontend_state);
+		break;
+	}
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id blkback_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+static struct xenbus_driver blkback = {
+	.name = "blkback",
+	.ids = blkback_ids,
+	.probe = blkback_probe,
+	.remove = blkback_remove,
+	.resume = blkback_resume,
+	.otherend_changed = frontend_changed,
+};
+
+static void
+blkback_init(void *unused)
+{
+	int i;
+
+	TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
+	mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
+
+	mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
+
+	mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	pending_reqs = malloc(sizeof(pending_reqs[0]) *
+		blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
+	pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
+		mmap_pages, M_DEVBUF, M_NOWAIT);
+	pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
+		mmap_pages, M_DEVBUF, M_NOWAIT);
+	mmap_vstart = alloc_empty_page_range(mmap_pages);
+	if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
+		if (pending_reqs)
+			free(pending_reqs, M_DEVBUF);
+		if (pending_grant_handles)
+			free(pending_grant_handles, M_DEVBUF);
+		if (pending_vaddrs)
+			free(pending_vaddrs, M_DEVBUF);
+		WPRINTF("out of memory\n");
+		return;
+	}
+
+	for (i = 0; i < mmap_pages; i++) {
+		pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+		pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+	}
+
+	for (i = 0; i < blkif_reqs; i++) {
+		STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
+	}
+
+	DPRINTF("registering %s\n", blkback.name);
+	xenbus_register_backend(&blkback);
+}
+
+SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
+
+static void
+close_device(blkif_t *blkif)
+{
+	DPRINTF("closing dev=%s\n", blkif->dev_name);
+	if (blkif->vn) {
+		int flags = FREAD;
+
+		if (!blkif->read_only)
+			flags |= FWRITE;
+
+		if (blkif->csw) {
+			dev_relthread(blkif->cdev);
+			blkif->csw = NULL;
+		}
+
+		(void)vn_close(blkif->vn, flags, NOCRED, curthread);
+		blkif->vn = NULL;
+	}
+}
+
+static int
+open_device(blkif_t *blkif)
+{
+	struct nameidata nd;
+	struct vattr vattr;
+	struct cdev *dev;
+	struct cdevsw *devsw;
+	int flags = FREAD, err = 0;
+
+	DPRINTF("opening dev=%s\n", blkif->dev_name);
+
+	if (!blkif->read_only)
+		flags |= FWRITE;
+
+	if (!curthread->td_proc->p_fd->fd_cdir) {
+		curthread->td_proc->p_fd->fd_cdir = rootvnode;
+		VREF(rootvnode);
+	}
+	if (!curthread->td_proc->p_fd->fd_rdir) {
+		curthread->td_proc->p_fd->fd_rdir = rootvnode;
+		VREF(rootvnode);
+	}
+	if (!curthread->td_proc->p_fd->fd_jdir) {
+		curthread->td_proc->p_fd->fd_jdir = rootvnode;
+		VREF(rootvnode);
+	}
+
+ again:
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
+	err = vn_open(&nd, &flags, 0, -1);
+	if (err) {
+		if (blkif->dev_name[0] != '/') {
+			char *dev_path = "/dev/";
+			char *dev_name;
+
+			/* Try adding device path at beginning of name */
+			dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
+			if (dev_name) {
+				sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
+				free(blkif->dev_name, M_DEVBUF);			
+				blkif->dev_name = dev_name;
+				goto again;
+			}
+		}
+		xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
+		return err;
+	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+		
+	blkif->vn = nd.ni_vp;
+
+	/* We only support disks for now */
+	if (!vn_isdisk(blkif->vn, &err)) {
+		xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
+		VOP_UNLOCK(blkif->vn, 0, curthread);
+		goto error;
+	}
+
+	blkif->cdev = blkif->vn->v_rdev;
+	blkif->csw = dev_refthread(blkif->cdev);
+	PANIC_IF(blkif->csw == NULL);
+
+	err = VOP_GETATTR(blkif->vn, &vattr, NOCRED, curthread);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error getting vnode attributes for device %s", blkif->dev_name);
+		VOP_UNLOCK(blkif->vn, 0, curthread);
+		goto error;
+	}
+
+	VOP_UNLOCK(blkif->vn, 0, curthread);
+
+	dev = blkif->vn->v_rdev;
+	devsw = dev->si_devsw;
+	if (!devsw->d_ioctl) {
+		err = ENODEV;
+		xenbus_dev_fatal(blkif->xdev, err,
+			"no d_ioctl for device %s!", blkif->dev_name);
+		goto error;
+	}
+
+	err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
+		goto error;
+	}
+	blkif->sector_size_shift = fls(blkif->sector_size) - 1;
+
+	err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
+		goto error;
+	}
+	blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
+
+	blkif->major = umajor(vattr.va_rdev);
+	blkif->minor = uminor(vattr.va_rdev);
+
+	DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
+			blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
+
+	return 0;
+
+ error:
+	close_device(blkif);
+	return err;
+}
+
+static int
+vbd_add_dev(struct xenbus_device *xdev)
+{
+	blkif_t *blkif = xdev->data;
+	device_t nexus, ndev;
+	devclass_t dc;
+	int err = 0;
+
+	mtx_lock(&Giant);
+
+	/* We will add a vbd device as a child of nexus0 (for now) */
+	if (!(dc = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(dc, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		err = ENOENT;
+		goto done;
+	}
+
+
+	/* Create a newbus device representing the vbd */
+	ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
+	if (!ndev) {
+		WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
+		err = EFAULT;
+		goto done;
+	}
+	
+	blkif_get(blkif);
+	device_set_ivars(ndev, blkif);
+	blkif->ndev = ndev;
+
+	device_probe_and_attach(ndev);
+
+ done:
+
+	mtx_unlock(&Giant);
+
+	return err;
+}
+
+enum {
+	VBD_SYSCTL_DOMID,
+	VBD_SYSCTL_ST_RD_REQ,
+	VBD_SYSCTL_ST_WR_REQ,
+	VBD_SYSCTL_ST_OO_REQ,
+	VBD_SYSCTL_ST_ERR_REQ,
+	VBD_SYSCTL_RING,
+};
+
+static char *
+vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
+{
+	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+	if (buf) {
+		if (!blkif->ring_connected)
+			sprintf(buf, "ring not connected\n");
+		else {
+			blkif_back_ring_t *ring = &blkif->ring;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					ring->nr_ents, ring->req_cons,
+					ring->sring->req_prod, ring->sring->req_event,
+					ring->sring->rsp_prod, ring->sring->rsp_event);
+		}
+	}
+	return buf;
+}
+
+static int
+vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev = (device_t)arg1;
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+	const char *value;
+	char *buf = NULL;
+	int err;
+
+	switch (arg2) {
+	case VBD_SYSCTL_DOMID:
+		return sysctl_handle_int(oidp, NULL, blkif->domid, req);
+	case VBD_SYSCTL_ST_RD_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
+	case VBD_SYSCTL_ST_WR_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
+	case VBD_SYSCTL_ST_OO_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
+	case VBD_SYSCTL_ST_ERR_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
+	case VBD_SYSCTL_RING:
+		value = buf = vbd_sysctl_ring_info(blkif, arg2);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	err = SYSCTL_OUT(req, value, strlen(value));
+	if (buf != NULL)
+		free(buf, M_DEVBUF);
+
+	return err;
+}
+
+/* Newbus vbd device driver probe */
+static int
+vbd_probe(device_t dev)
+{
+	DPRINTF("vbd%d\n", device_get_unit(dev));
+	return 0;
+}
+
+/* Newbus vbd device driver attach */
+static int
+vbd_attach(device_t dev) 
+{
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+	DPRINTF("%s\n", blkif->dev_name);
+
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
+	    "domid of frontend");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
+	    "number of read reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
+	    "number of write reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
+	    "number of deferred reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
+	    "number of reqs that returned error");
+#if XEN_BLKBACK_DEBUG
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "ring", CTLFLAG_RD,
+	    dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
+	    "req ring info");
+#endif
+
+	if (!open_device(blkif))
+		connect(blkif);
+
+	return bus_generic_attach(dev);
+}
+
+/* Newbus vbd device driver detach */
+static int
+vbd_detach(device_t dev)
+{
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+	DPRINTF("%s\n", blkif->dev_name);
+
+	close_device(blkif);
+
+	bus_generic_detach(dev);
+
+	blkif_put(blkif);
+
+	return 0;
+}
+
+static device_method_t vbd_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vbd_probe),
+	DEVMETHOD(device_attach, 	vbd_attach),
+	DEVMETHOD(device_detach,	vbd_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	{0, 0}
+};
+
+static devclass_t vbd_devclass;
+
+static driver_t vbd_driver = {
+	"vbd",
+	vbd_methods,
+	0,
+};
+
+DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c
new file mode 100644
index 0000000..c448b81
--- /dev/null
+++ b/sys/dev/xen/blkfront/blkfront.c
@@ -0,0 +1,1021 @@
+/*-
+ * All rights reserved.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * XenoBSD block device driver
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/intr_machdep.h>
+#include <machine/vmparam.h>
+
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xen_intr.h>
+#include <machine/xen/xenbus.h>
+#include <machine/xen/evtchn.h>
+#include <xen/interface/grant_table.h>
+
+#include <geom/geom_disk.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/gnttab.h>
+
+#include <dev/xen/blkfront/block.h>
+
+#define    ASSERT(S)       KASSERT(S, (#S))
+/* prototypes */
+struct xb_softc;
+static void xb_startio(struct xb_softc *sc);
+static void connect(struct blkfront_info *);
+static void blkfront_closing(struct xenbus_device *);
+static int blkfront_remove(struct xenbus_device *);
+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
+static void blkif_int(void *);
+#if 0
+static void blkif_restart_queue(void *arg);
+#endif
+static void blkif_recover(struct blkfront_info *);
+static void blkif_completion(struct blk_shadow *);
+static void blkif_free(struct blkfront_info *, int);
+
+#define GRANT_INVALID_REF 0
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 0
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#endif
+
+
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED    1
+#define BLKIF_STATE_SUSPENDED    2
+
+#ifdef notyet
+static char *blkif_state_name[] = {
+	[BLKIF_STATE_DISCONNECTED] = "disconnected",
+	[BLKIF_STATE_CONNECTED]    = "connected",
+	[BLKIF_STATE_SUSPENDED]    = "closed",
+};
+
+static char * blkif_status_name[] = {
+	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
+	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+#endif
+#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
+#if 0
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
+#else
+#define DPRINTK(fmt, args...) 
+#endif
+
+static grant_ref_t gref_head;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+
+static void kick_pending_request_queues(struct blkfront_info *);
+static int blkif_open(struct disk *dp);
+static int blkif_close(struct disk *dp);
+static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
+static int blkif_queue_request(struct bio *bp);
+static void xb_strategy(struct bio *bp);
+
+
+
+/* XXX move to xb_vbd.c when VBD update support is added */
+#define MAX_VBDS 64
+
+#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
+#define XBD_SECTOR_SHFT		9
+
+static struct mtx blkif_io_lock;
+
+static unsigned long 
+pfn_to_mfn(unsigned long pfn)
+{
+	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+}
+
+
+int
+xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size, 
+	  struct blkfront_info *info)
+{
+	struct xb_softc	*sc;
+	int			error = 0;
+    
+	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	sc->xb_unit = unit;
+	sc->xb_info = info;
+	info->sc = sc;
+
+	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 
+	sc->xb_disk = disk_alloc();
+	sc->xb_disk->d_unit = unit;
+	sc->xb_disk->d_open = blkif_open;
+	sc->xb_disk->d_close = blkif_close;
+	sc->xb_disk->d_ioctl = blkif_ioctl;
+	sc->xb_disk->d_strategy = xb_strategy;
+	sc->xb_disk->d_name = "xbd";
+	sc->xb_disk->d_drv1 = sc;
+	sc->xb_disk->d_sectorsize = sector_size;
+
+	/* XXX */
+	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
+#if 0
+	sc->xb_disk->d_maxsize = DFLTPHYS;
+#else /* XXX: xen can't handle large single i/o requests */
+	sc->xb_disk->d_maxsize = 4096;
+#endif
+#ifdef notyet
+	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
+		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
+		  sc->xb_disk->d_mediasize);
+#endif
+	sc->xb_disk->d_flags = 0;
+	disk_create(sc->xb_disk, DISK_VERSION_00);
+	bioq_init(&sc->xb_bioq);
+
+	return error;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+	struct xb_softc	*sc;
+
+	sc = info->sc;
+	disk_destroy(sc->xb_disk);
+}
+/************************ end VBD support *****************/
+
+/*
+ * Read/write routine for a buffer.  Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
+static void
+xb_strategy(struct bio *bp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+	/* bogus disk? */
+	if (sc == NULL) {
+		bp->bio_error = EINVAL;
+		bp->bio_flags |= BIO_ERROR;
+		goto bad;
+	}
+
+	DPRINTK("");
+
+	/*
+	 * Place it in the queue of disk activities for this disk
+	 */
+	mtx_lock(&blkif_io_lock);
+	bioq_disksort(&sc->xb_bioq, bp);
+
+	xb_startio(sc);
+	mtx_unlock(&blkif_io_lock);
+	return;
+
+ bad:
+	/*
+	 * Correctly set the bio to indicate a failed tranfer.
+	 */
+	bp->bio_resid = bp->bio_bcount;
+	biodone(bp);
+	return;
+}
+
+
+/* Setup supplies the backend dir, virtual device.
+
+We place an event channel and shared frame entries.
+We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+			  const struct xenbus_device_id *id)
+{
+	int err, vdevice, i;
+	struct blkfront_info *info;
+
+	/* FIXME: Use dynamic device id if this is not set. */
+	err = xenbus_scanf(XBT_NIL, dev->nodename,
+			   "virtual-device", "%i", &vdevice);
+	if (err != 1) {
+		xenbus_dev_fatal(dev, err, "reading virtual-device");
+		return err;
+	}
+
+	info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (info == NULL) {
+		xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
+		return ENOMEM;
+	}
+	
+	/*
+	 * XXX debug only
+	 */
+	for (i = 0; i < sizeof(*info); i++)
+			if (((uint8_t *)info)[i] != 0)
+					panic("non-null memory");
+
+	info->shadow_free = 0;
+	info->xbdev = dev;
+	info->vdevice = vdevice;
+	info->connected = BLKIF_STATE_DISCONNECTED;
+
+	/* work queue needed ? */
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Front end dir is a number, which is used as the id. */
+	info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+	dev->dev_driver_data = info;
+
+	err = talk_to_backend(dev, info);
+	if (err) {
+		free(info, M_DEVBUF);
+		dev->dev_driver_data = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev_driver_data;
+	int err;
+
+	DPRINTK("blkfront_resume: %s\n", dev->nodename);
+
+	blkif_free(info, 1);
+
+	err = talk_to_backend(dev, info);
+	if (!err)
+		blkif_recover(info);
+
+	return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+			   struct blkfront_info *info)
+{
+	const char *message = NULL;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_blkring(dev, info);
+	if (err)
+		goto out;
+
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_blkring;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename,
+			    "ring-ref","%u", info->ring_ref);
+	if (err) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename,
+		"event-channel", "%u", irq_to_evtchn_port(info->irq));
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_blkring;
+	}
+	xenbus_switch_state(dev, XenbusStateInitialised);
+	
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+	blkif_free(info, 0);
+ out:
+	return err;
+}
+
+static int 
+setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
+{
+	blkif_sring_t *sring;
+	int err;
+
+	info->ring_ref = GRANT_INVALID_REF;
+
+	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (sring == NULL) {
+		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
+		return ENOMEM;
+	}
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+	err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
+	if (err < 0) {
+		free(sring, M_DEVBUF);
+		info->ring.sring = NULL;
+		goto fail;
+	}
+	info->ring_ref = err;
+	
+	err = bind_listening_port_to_irqhandler(dev->otherend_id,
+		"xbd", (driver_intr_t *)blkif_int, info,
+					INTR_TYPE_BIO | INTR_MPSAFE, NULL);
+	if (err <= 0) {
+		xenbus_dev_fatal(dev, err,
+				 "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+	info->irq = err;
+
+	return 0;
+ fail:
+	blkif_free(info, 0);
+	return err;
+}
+
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+			    XenbusState backend_state)
+{
+	struct blkfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("blkfront:backend_changed.\n");
+
+	switch (backend_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateConnected:
+		connect(info);
+		break;
+
+	case XenbusStateClosing:
+		if (info->users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+					 "Device in use; refusing to close");
+		else
+			blkfront_closing(dev);
+#ifdef notyet
+		bd = bdget(info->dev);
+		if (bd == NULL)
+			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+		down(&bd->bd_sem);
+		if (info->users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+					 "Device in use; refusing to close");
+		else
+			blkfront_closing(dev);
+		up(&bd->bd_sem);
+		bdput(bd);
+#endif
+	}
+}
+
+/* 
+** Invoked when the backend is finally 'ready' (and has told produced 
+** the details about the physical device - #sectors, size, etc). 
+*/
+static void 
+connect(struct blkfront_info *info)
+{
+	unsigned long sectors, sector_size;
+	unsigned int binfo;
+	int err;
+
+        if( (info->connected == BLKIF_STATE_CONNECTED) || 
+	    (info->connected == BLKIF_STATE_SUSPENDED) )
+		return;
+
+	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "sectors", "%lu", &sectors,
+			    "info", "%u", &binfo,
+			    "sector-size", "%lu", &sector_size,
+			    NULL);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err,
+				 "reading backend fields at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-barrier", "%lu", &info->feature_barrier,
+			    NULL);
+	if (err)
+		info->feature_barrier = 0;
+
+	xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+
+	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected); 
+
+	/* Kick pending requests. */
+	mtx_lock(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+
+#if 0
+	add_disk(info->gd);
+#endif
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+	if (info->mi) {
+		DPRINTK("Calling xlvbd_del\n");
+		xlvbd_del(info);
+		info->mi = NULL;
+	}
+
+	xenbus_switch_state(dev, XenbusStateClosed);
+}
+
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
+
+	blkif_free(info, 0);
+
+	free(info, M_DEVBUF);
+
+	return 0;
+}
+
+
+static inline int 
+GET_ID_FROM_FREELIST(struct blkfront_info *info)
+{
+	unsigned long nfree = info->shadow_free;
+	
+	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
+	info->shadow_free = info->shadow[nfree].req.id;
+	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
+	return nfree;
+}
+
+static inline void 
+ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
+{
+	info->shadow[id].req.id  = info->shadow_free;
+	info->shadow[id].request = 0;
+	info->shadow_free = id;
+}
+
+static inline void 
+flush_requests(struct blkfront_info *info)
+{
+	int notify;
+
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+	if (notify)
+		notify_remote_via_irq(info->irq);
+}
+
+static void 
+kick_pending_request_queues(struct blkfront_info *info)
+{
+	/* XXX check if we can't simplify */
+#if 0
+	if (!RING_FULL(&info->ring)) {
+		/* Re-enable calldowns. */
+		blk_start_queue(info->rq);
+		/* Kick things off immediately. */
+		do_blkif_request(info->rq);
+	}
+#endif
+	if (!RING_FULL(&info->ring)) {
+#if 0
+		sc = LIST_FIRST(&xbsl_head);
+		LIST_REMOVE(sc, entry);
+		/* Re-enable calldowns. */
+		blk_start_queue(di->rq);
+#endif
+		/* Kick things off immediately. */
+		xb_startio(info->sc);
+	}
+}
+
+#if 0
+/* XXX */
+static void blkif_restart_queue(void *arg)
+{
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+
+	mtx_lock(&blkif_io_lock);
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+}
+#endif
+
+static void blkif_restart_queue_callback(void *arg)
+{
+#if 0
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+	/* XXX BSD equiv ? */
+
+	schedule_work(&info->work);
+#endif
+}
+
+static int
+blkif_open(struct disk *dp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL) {
+		printk("xb%d: not found", sc->xb_unit);
+		return (ENXIO);
+	}
+
+	sc->xb_flags |= XB_OPEN;
+	sc->xb_info->users++;
+	return (0);
+}
+
+static int
+blkif_close(struct disk *dp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL)
+		return (ENXIO);
+	sc->xb_flags &= ~XB_OPEN;
+	if (--(sc->xb_info->users) == 0) {
+		/* Check whether we have been instructed to close.  We will
+		   have ignored this request initially, as the device was
+		   still mounted. */
+		struct xenbus_device * dev = sc->xb_info->xbdev;
+		XenbusState state = xenbus_read_driver_state(dev->otherend);
+
+		if (state == XenbusStateClosing)
+			blkfront_closing(dev);
+	}
+	return (0);
+}
+
+static int
+blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL)
+		return (ENXIO);
+
+	return (ENOTTY);
+}
+
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ * 
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ *   virtual address in the guest os.
+ */
+static int blkif_queue_request(struct bio *bp)
+{
+	caddr_t alignbuf;
+	unsigned long  	buffer_ma;
+	blkif_request_t     *ring_req;
+	unsigned long id;
+	unsigned int fsect, lsect;
+	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+	struct blkfront_info *info = sc->xb_info;
+	int ref;
+
+	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
+		return 1;
+
+	if (gnttab_alloc_grant_references(
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+		gnttab_request_free_callback(
+			&info->callback,
+			blkif_restart_queue_callback,
+			info,
+			BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		return 1;
+	}
+
+	/* Check if the buffer is properly aligned */
+	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
+		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 
+			PAGE_SIZE;
+		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 
+					M_NOWAIT);
+
+		alignbuf = (char *)roundup2((u_long)newbuf, align);
+
+		/* save a copy of the current buffer */
+		bp->bio_driver1 = newbuf;
+		bp->bio_driver2 = alignbuf;
+
+		/* Copy the data for a write */
+		if (bp->bio_cmd == BIO_WRITE)
+			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
+	} else
+		alignbuf = bp->bio_data;
+	
+	/* Fill out a communications ring structure. */
+	ring_req 	         = RING_GET_REQUEST(&info->ring, 
+						    info->ring.req_prod_pvt);
+	id		         = GET_ID_FROM_FREELIST(info);
+	info->shadow[id].request = (unsigned long)bp;
+	
+	ring_req->id 	         = id;
+	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+		BLKIF_OP_WRITE;
+	
+	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
+	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
+	
+	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
+					 * chaining is not supported.
+					 */
+
+	buffer_ma = vtomach(alignbuf);
+	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
+	/* install a grant reference. */
+	ref = gnttab_claim_grant_reference(&gref_head);
+	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
+
+	gnttab_grant_foreign_access_ref(
+		ref,
+		info->xbdev->otherend_id,
+		buffer_ma >> PAGE_SHIFT,
+		ring_req->operation & 1 ); /* ??? */
+	info->shadow[id].frame[ring_req->nr_segments] = 
+		buffer_ma >> PAGE_SHIFT;
+
+	ring_req->seg[ring_req->nr_segments] =
+		(struct blkif_request_segment) {
+			.gref       = ref,
+			.first_sect = fsect, 
+			.last_sect  = lsect };
+
+	ring_req->nr_segments++;
+	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
+		("XEN buffer must be sector aligned"));
+	KASSERT(lsect <= 7, 
+		("XEN disk driver data cannot cross a page boundary"));
+	
+	buffer_ma &= ~PAGE_MASK;
+
+	info->ring.req_prod_pvt++;
+
+	/* Keep a private copy so we can reissue requests when recovering. */
+	info->shadow[id].req = *ring_req;
+
+	gnttab_free_grant_references(gref_head);
+
+	return 0;
+}
+
+
+
+/*
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have 
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
+ */
+static void
+xb_startio(struct xb_softc *sc)
+{
+	struct bio		*bp;
+	int			queued = 0;
+	struct blkfront_info *info = sc->xb_info;
+	DPRINTK("");
+
+	mtx_assert(&blkif_io_lock, MA_OWNED);
+
+	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
+
+		if (RING_FULL(&info->ring)) 
+			goto wait;
+    	
+		if (blkif_queue_request(bp)) {
+		wait:
+			bioq_insert_head(&sc->xb_bioq, bp);
+			break;
+		}
+		queued++;
+	}
+
+	if (queued != 0) 
+		flush_requests(sc->xb_info);
+}
+
+static void
+blkif_int(void *xsc)
+{
+	struct xb_softc *sc = NULL;
+	struct bio *bp;
+	blkif_response_t *bret;
+	RING_IDX i, rp;
+	struct blkfront_info *info = xsc;
+	DPRINTK("");
+
+	TRACE_ENTER;
+
+	mtx_lock(&blkif_io_lock);
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+		mtx_unlock(&blkif_io_lock);
+		return;
+	}
+
+ again:
+	rp = info->ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	for (i = info->ring.rsp_cons; i != rp; i++) {
+		unsigned long id;
+
+		bret = RING_GET_RESPONSE(&info->ring, i);
+		id   = bret->id;
+		bp   = (struct bio *)info->shadow[id].request;
+
+		blkif_completion(&info->shadow[id]);
+
+		ADD_ID_TO_FREELIST(info, id);
+
+		switch (bret->operation) {
+		case BLKIF_OP_READ:
+			/* had an unaligned buffer that needs to be copied */
+			if (bp->bio_driver1)
+				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
+			/* FALLTHROUGH */
+		case BLKIF_OP_WRITE:
+
+			/* free the copy buffer */
+			if (bp->bio_driver1) {
+				free(bp->bio_driver1, M_DEVBUF);
+				bp->bio_driver1 = NULL;
+			}
+
+			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
+				XENPRINTF("Bad return from blkdev data request: %x\n", 
+					  bret->status);
+				bp->bio_flags |= BIO_ERROR;
+			}
+
+			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+			if (bp->bio_flags & BIO_ERROR)
+				bp->bio_error = EIO;
+			else
+				bp->bio_resid = 0;
+
+			biodone(bp);
+			break;
+		default:
+			panic("received invalid operation");
+			break;
+		}
+	}
+
+	info->ring.rsp_cons = i;
+
+	if (i != info->ring.req_prod_pvt) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+		if (more_to_do)
+			goto again;
+	} else {
+		info->ring.sring->rsp_event = i + 1;
+	}
+
+	kick_pending_request_queues(info);
+
+	mtx_unlock(&blkif_io_lock);
+}
+
+static void 
+blkif_free(struct blkfront_info *info, int suspend)
+{
+	
+/* Prevent new requests being issued until we fix things up. */
+	mtx_lock(&blkif_io_lock);
+	info->connected = suspend ? 
+		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 
+	mtx_unlock(&blkif_io_lock);
+
+	/* Free resources associated with old device channel. */
+	if (info->ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(info->ring_ref, 0,
+					  info->ring.sring);
+		info->ring_ref = GRANT_INVALID_REF;
+		info->ring.sring = NULL;
+	}
+	if (info->irq)
+		unbind_from_irqhandler(info->irq, info); 
+	info->irq = 0;
+
+}
+
+static void 
+blkif_completion(struct blk_shadow *s)
+{
+	int i;
+
+	for (i = 0; i < s->req.nr_segments; i++)
+		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+}
+
+static void 
+blkif_recover(struct blkfront_info *info)
+{
+	int i, j;
+	blkif_request_t *req;
+	struct blk_shadow *copy;
+
+	/* Stage 1: Make a safe copy of the shadow state. */
+	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
+	PANIC_IF(copy == NULL);
+	memcpy(copy, info->shadow, sizeof(info->shadow));
+
+	/* Stage 2: Set up free list. */
+	memset(&info->shadow, 0, sizeof(info->shadow));
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow_free = info->ring.req_prod_pvt;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Stage 3: Find pending requests and requeue them. */
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/* Not in use? */
+		if (copy[i].request == 0)
+			continue;
+
+		/* Grab a request slot and copy shadow state into it. */
+		req = RING_GET_REQUEST(
+			&info->ring, info->ring.req_prod_pvt);
+		*req = copy[i].req;
+
+		/* We get a new request id, and must reset the shadow state. */
+		req->id = GET_ID_FROM_FREELIST(info);
+		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+		/* Rewrite any grant references invalidated by suspend/resume. */
+		for (j = 0; j < req->nr_segments; j++)
+			gnttab_grant_foreign_access_ref(
+				req->seg[j].gref,
+				info->xbdev->otherend_id,
+				pfn_to_mfn(info->shadow[req->id].frame[j]),
+				0 /* assume not readonly */);
+
+		info->shadow[req->id].req = *req;
+
+		info->ring.req_prod_pvt++;
+	}
+
+	free(copy, M_DEVBUF);
+
+	xenbus_switch_state(info->xbdev, XenbusStateConnected); 
+	
+	/* Now safe for us to use the shared ring */
+	mtx_lock(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	mtx_unlock(&blkif_io_lock);
+
+	/* Send off requeued requests */
+	mtx_lock(&blkif_io_lock);
+	flush_requests(info);
+
+	/* Kick any other new requests queued since we resumed */
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+}
+
+static int
+blkfront_is_ready(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev_driver_data;
+
+	return info->is_ready;
+}
+
+static struct xenbus_device_id blkfront_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+
+static struct xenbus_driver blkfront = {
+	.name             = "vbd",
+	.ids              = blkfront_ids,
+	.probe            = blkfront_probe,
+	.remove           = blkfront_remove,
+	.resume           = blkfront_resume,
+	.otherend_changed = backend_changed,
+	.is_ready		  = blkfront_is_ready,
+};
+
+
+
+static void
+xenbus_init(void)
+{
+	xenbus_register_frontend(&blkfront);
+}
+
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
+SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h
new file mode 100644
index 0000000..0d14459
--- /dev/null
+++ b/sys/dev/xen/blkfront/block.h
@@ -0,0 +1,97 @@
+/*
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+#include <xen/interface/io/blkif.h>
+
+struct xlbd_type_info
+{
+	int partn_shift;
+	int disks_per_major;
+	char *devname;
+	char *diskname;
+};
+
+struct xlbd_major_info
+{
+	int major;
+	int index;
+	int usage;
+	struct xlbd_type_info *type;
+};
+
+struct blk_shadow {
+	blkif_request_t req;
+	unsigned long request;
+	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+
+struct xb_softc {
+	device_t		  xb_dev;
+	struct disk		  *xb_disk;		/* disk params */
+	struct bio_queue_head     xb_bioq;		/* sort queue */
+	int			  xb_unit;
+	int			  xb_flags;
+	struct blkfront_info      *xb_info;
+	LIST_ENTRY(xb_softc)      entry;
+#define XB_OPEN	(1<<0)		/* drive is open (can't shut down) */
+};
+
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+	struct xenbus_device *xbdev;
+	dev_t dev;
+ 	struct gendisk *gd;
+	int vdevice;
+	blkif_vdev_t handle;
+	int connected;
+	int ring_ref;
+	blkif_front_ring_t ring;
+	unsigned int irq;
+	struct xlbd_major_info *mi;
+#if 0
+	request_queue_t *rq;
+	struct work_struct work;
+#endif
+	struct gnttab_free_callback callback;
+	struct blk_shadow shadow[BLK_RING_SIZE];
+	unsigned long shadow_free;
+	struct xb_softc *sc;
+	int feature_barrier;
+	int is_ready;
+	/**
+	 * The number of people holding this device open.  We won't allow a
+	 * hot-unplug unless this is 0.
+	 */
+	int users;
+};
+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
+   to call add_disk on info->gd once the disk is properly connected
+   up. */
+int xlvbd_add(blkif_sector_t capacity, int device,
+	      uint16_t vdisk_info, uint16_t sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
+
diff --git a/sys/dev/xen/console/console.c b/sys/dev/xen/console/console.c
new file mode 100644
index 0000000..dc9fe6f
--- /dev/null
+++ b/sys/dev/xen/console/console.c
@@ -0,0 +1,564 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <sys/cons.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+
+#include <dev/xen/console/xencons_ring.h>
+#include <xen/interface/io/console.h>
+
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+devclass_t xc_devclass; /* do not make static */
+static void	xcstart (struct tty *);
+static int	xcparam (struct tty *, struct termios *);
+static void	xcstop (struct tty *, int);
+static void	xc_timeout(void *);
+static void __xencons_tx_flush(void);
+static boolean_t xcons_putc(int c);
+
+/* switch console so that shutdown can occur gracefully */
+static void xc_shutdown(void *arg, int howto);
+static int xc_mute;
+
+static void xcons_force_flush(void);
+static void xencons_priv_interrupt(void *);
+
+static cn_probe_t       xccnprobe;
+static cn_init_t        xccninit;
+static cn_getc_t        xccngetc;
+static cn_putc_t        xccnputc;
+static cn_putc_t        xccnputc_dom0;
+static cn_checkc_t      xccncheckc;
+
+#define XC_POLLTIME 	(hz/10)
+
+CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, 
+	    xccncheckc, xccnputc, NULL);
+
+static int xen_console_up;
+static boolean_t xc_start_needed;
+static struct callout xc_callout;
+struct mtx              cn_mtx;
+
+#define RBUF_SIZE     1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+#define WBUF_SIZE     4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static char rbuf[RBUF_SIZE];
+static int rc, rp;
+static unsigned int cnsl_evt_reg;
+static unsigned int wc, wp; /* write_cons, write_prod */
+
+#define CDEV_MAJOR 12
+#define	XCUNIT(x)	(minor(x))
+#define ISTTYOPEN(tp)	((tp) && ((tp)->t_state & TS_ISOPEN))
+#define CN_LOCK_INIT(x, _name) \
+        mtx_init(&x, _name, NULL, MTX_SPIN|MTX_RECURSE)
+
+#define CN_LOCK(l)        								\
+		do {											\
+				if (panicstr == NULL)					\
+                        mtx_lock_spin(&(l));			\
+		} while (0)
+#define CN_UNLOCK(l)        							\
+		do {											\
+				if (panicstr == NULL)					\
+                        mtx_unlock_spin(&(l));			\
+		} while (0)
+#define CN_LOCK_ASSERT(x)    mtx_assert(&x, MA_OWNED)
+#define CN_LOCK_DESTROY(x)   mtx_destroy(&x)
+
+
+static struct tty *xccons;
+
+struct xc_softc {
+	int    xc_unit;
+	struct cdev *xc_dev;
+};
+
+
+static d_open_t  xcopen;
+static d_close_t xcclose;
+static d_ioctl_t xcioctl;
+
+static struct cdevsw xc_cdevsw = {
+	.d_version =    D_VERSION,
+        .d_flags =      D_TTY | D_NEEDGIANT,
+        .d_name =       driver_name,
+        .d_open =       xcopen,
+        .d_close =      xcclose,
+        .d_read =       ttyread,
+        .d_write =      ttywrite,
+        .d_ioctl =      xcioctl,
+        .d_poll =       ttypoll,
+        .d_kqfilter =   ttykqfilter,
+};
+
+static void
+xccnprobe(struct consdev *cp)
+{
+	cp->cn_pri = CN_REMOTE;
+	cp->cn_tp = xccons;
+	sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+
+static void
+xccninit(struct consdev *cp)
+{ 
+	CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
+
+}
+int
+xccngetc(struct consdev *dev)
+{
+	int c;
+	if (xc_mute)
+	    	return 0;
+	do {
+		if ((c = xccncheckc(dev)) == -1) {
+			/* polling without sleeping in Xen doesn't work well. 
+			 * Sleeping gives other things like clock a chance to 
+			 * run
+			 */
+			tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep", 
+			       XC_POLLTIME);
+		}
+	} while(c == -1);
+	return c;
+}
+
+int
+xccncheckc(struct consdev *dev)
+{
+	int ret = (xc_mute ? 0 : -1);
+	if (xencons_has_input()) 
+			xencons_handle_input(NULL);
+	
+	CN_LOCK(cn_mtx);
+	if ((rp - rc)) {
+		/* we need to return only one char */
+		ret = (int)rbuf[RBUF_MASK(rc)];
+		rc++;
+	}
+	CN_UNLOCK(cn_mtx);
+	return(ret);
+}
+
+static void
+xccnputc(struct consdev *dev, int c)
+{
+	xcons_putc(c);
+}
+
+static void
+xccnputc_dom0(struct consdev *dev, int c)
+{
+	HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c);
+}
+
+extern int db_active;
+static boolean_t
+xcons_putc(int c)
+{
+	int force_flush = xc_mute ||
+#ifdef DDB
+		db_active ||
+#endif
+		panicstr;	/* we're not gonna recover, so force
+				 * flush 
+				 */
+
+	if ((wp-wc) < (WBUF_SIZE-1)) {
+		if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') {
+        		wbuf[WBUF_MASK(wp++)] = '\r';
+#ifdef notyet
+			if (force_flush)
+				xcons_force_flush();
+#endif
+		}
+	} else if (force_flush) {
+#ifdef notyet
+		xcons_force_flush();
+#endif	    	
+	}
+	if (cnsl_evt_reg)
+		__xencons_tx_flush();
+	
+	/* inform start path that we're pretty full */
+	return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
+}
+
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+	device_t child;
+	child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+	device_set_driver(child, driver);
+	device_set_desc(child, "Xen Console");
+}
+
+static int
+xc_probe(device_t dev)
+{
+	struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+	sc->xc_unit = device_get_unit(dev);
+	return (0);
+}
+
+static int
+xc_attach(device_t dev) 
+{
+	struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+
+	if (xen_start_info->flags & SIF_INITDOMAIN) {
+		xc_consdev.cn_putc = xccnputc_dom0;
+	} 
+
+	sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
+	xccons = ttyalloc();
+
+	sc->xc_dev->si_drv1 = (void *)sc;
+	sc->xc_dev->si_tty = xccons;
+			     
+	xccons->t_oproc = xcstart;
+	xccons->t_param = xcparam;
+	xccons->t_stop = xcstop;
+	xccons->t_dev = sc->xc_dev;
+
+	callout_init(&xc_callout, 0);
+
+	xencons_ring_init();
+
+	cnsl_evt_reg = 1;
+	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
+    
+	if (xen_start_info->flags & SIF_INITDOMAIN) {
+		PANIC_IF(bind_virq_to_irqhandler(
+				 VIRQ_CONSOLE,
+				 0,
+				 "console",
+				 NULL,
+				 xencons_priv_interrupt,
+				 INTR_TYPE_TTY) < 0);
+		
+	}
+
+
+	/* register handler to flush console on shutdown */
+	if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
+				   NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
+		printf("xencons: shutdown event registration failed!\n");
+	
+	TRACE_EXIT;
+	return (0);
+}
+
+/*
+ * return 0 for all console input, force flush all output.
+ */
+static void
+xc_shutdown(void *arg, int howto)
+{
+	xc_mute = 1;
+	xcons_force_flush();
+}
+
+void 
+xencons_rx(char *buf, unsigned len)
+{
+	int           i;
+	struct tty *tp = xccons;
+	
+	for (i = 0; i < len; i++) {
+		if (xen_console_up) 
+			(*linesw[tp->t_line]->l_rint)(buf[i], tp);
+		else
+			rbuf[RBUF_MASK(rp++)] = buf[i];
+	}
+}
+
+static void 
+__xencons_tx_flush(void)
+{
+	int        sz, work_done = 0;
+
+	CN_LOCK(cn_mtx);
+	while (wc != wp) {
+		int sent;
+		sz = wp - wc;
+		if (sz > (WBUF_SIZE - WBUF_MASK(wc)))
+			sz = WBUF_SIZE - WBUF_MASK(wc);
+		if (xen_start_info->flags & SIF_INITDOMAIN) {
+			HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]);
+			wc += sz;
+		} else {
+			sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+			if (sent == 0) 
+				break;
+			wc += sent;
+		}
+		work_done = 1;
+	}
+	CN_UNLOCK(cn_mtx);
+
+	/*
+	 * ttwakeup calls routines using blocking locks
+	 *
+	 */
+	if (work_done && xen_console_up && curthread->td_critnest == 0)
+		ttwakeup(xccons);
+}
+
+void
+xencons_tx(void)
+{
+	__xencons_tx_flush();
+}
+
+static void
+xencons_priv_interrupt(void *arg)
+{
+
+	static char rbuf[16];
+	int         l;
+
+	while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+		xencons_rx(rbuf, l);
+
+	xencons_tx();
+}
+
+int
+xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+	struct xc_softc *sc;
+	int unit = XCUNIT(dev);
+	struct tty *tp;
+	int s, error;
+
+	sc = (struct xc_softc *)device_get_softc(
+		devclass_get_device(xc_devclass, unit));
+	if (sc == NULL)
+		return (ENXIO);
+    
+	TRACE_ENTER;
+	tp = dev->si_tty;
+	s = spltty();
+	if (!ISTTYOPEN(tp)) {
+		tp->t_state |= TS_CARR_ON;
+		ttychars(tp);
+		tp->t_iflag = TTYDEF_IFLAG;
+		tp->t_oflag = TTYDEF_OFLAG;
+		tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
+		tp->t_lflag = TTYDEF_LFLAG;
+		tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+		xcparam(tp, &tp->t_termios);
+		ttsetwater(tp);
+	} else if (tp->t_state & TS_XCLUDE && priv_check(td, PRIV_ROOT)) {
+		splx(s);
+		return (EBUSY);
+	}
+	splx(s);
+
+	xen_console_up = 1;
+
+	error =  (*linesw[tp->t_line]->l_open)(dev, tp);
+	TRACE_EXIT;
+	return error;
+}
+
+int
+xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+	struct tty *tp = dev->si_tty;
+    
+	if (tp == NULL)
+		return (0);
+	xen_console_up = 0;
+    
+	spltty();
+	(*linesw[tp->t_line]->l_close)(tp, flag);
+	tty_close(tp);
+	spl0();
+	return (0);
+}
+
+
+int
+xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+	struct tty *tp = dev->si_tty;
+	int error;
+    
+	error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
+	if (error != ENOIOCTL)
+		return (error);
+
+	error = ttioctl(tp, cmd, data, flag);
+
+	if (error != ENOIOCTL)
+		return (error);
+
+	return (ENOTTY);
+}
+
+static inline int 
+__xencons_put_char(int ch)
+{
+	char _ch = (char)ch;
+	if ((wp - wc) == WBUF_SIZE)
+		return 0;
+	wbuf[WBUF_MASK(wp++)] = _ch;
+	return 1;
+}
+
+
+static void
+xcstart(struct tty *tp)
+{
+	boolean_t cons_full = FALSE;
+
+	CN_LOCK(cn_mtx);
+	if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+			CN_UNLOCK(cn_mtx);
+
+		ttwwakeup(tp);
+		return;
+	}
+
+	tp->t_state |= TS_BUSY;
+	CN_UNLOCK(cn_mtx);
+
+	while (tp->t_outq.c_cc != 0 && !cons_full)
+		cons_full = xcons_putc(getc(&tp->t_outq));
+
+	/* if the console is close to full leave our state as busy */
+	if (!cons_full) {
+			CN_LOCK(cn_mtx);
+			tp->t_state &= ~TS_BUSY;
+			CN_UNLOCK(cn_mtx);
+			ttwwakeup(tp);
+	} else {
+	    	/* let the timeout kick us in a bit */
+	    	xc_start_needed = TRUE;
+	}
+
+}
+
+static void
+xcstop(struct tty *tp, int flag)
+{
+
+	if (tp->t_state & TS_BUSY) {
+		if ((tp->t_state & TS_TTSTOP) == 0) {
+			tp->t_state |= TS_FLUSH;
+		}
+	}
+}
+
+static void
+xc_timeout(void *v)
+{
+	struct	tty *tp;
+	int 	c;
+
+	tp = (struct tty *)v;
+
+	while ((c = xccncheckc(NULL)) != -1) {
+		if (tp->t_state & TS_ISOPEN) {
+			(*linesw[tp->t_line]->l_rint)(c, tp);
+		}
+	}
+
+	if (xc_start_needed) {
+	    	xc_start_needed = FALSE;
+		xcstart(tp);
+	}
+
+	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
+}
+
+/*
+ * Set line parameters.
+ */
+int
+xcparam(struct tty *tp, struct termios *t)
+{
+	tp->t_ispeed = t->c_ispeed;
+	tp->t_ospeed = t->c_ospeed;
+	tp->t_cflag = t->c_cflag;
+	return (0);
+}
+
+
+static device_method_t xc_methods[] = {
+	DEVMETHOD(device_identify, xc_identify),
+	DEVMETHOD(device_probe, xc_probe),
+	DEVMETHOD(device_attach, xc_attach),
+	{0, 0}
+};
+
+static driver_t xc_driver = {
+	driver_name,
+	xc_methods,
+	sizeof(struct xc_softc),
+};
+
+/*** Forcibly flush console data before dying. ***/
+void 
+xcons_force_flush(void)
+{
+	int        sz;
+
+	if (xen_start_info->flags & SIF_INITDOMAIN)
+		return;
+
+	/* Spin until console data is flushed through to the domain controller. */
+	while (wc != wp) {
+		int sent = 0;
+		if ((sz = wp - wc) == 0)
+			continue;
+		
+		sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+		if (sent > 0)
+			wc += sent;		
+	}
+}
+
+DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0);
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/console/xencons_ring.c b/sys/dev/xen/console/xencons_ring.c
new file mode 100644
index 0000000..c9b60ac
--- /dev/null
+++ b/sys/dev/xen/console/xencons_ring.c
@@ -0,0 +1,154 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <sys/cons.h>
+
+
+#include <dev/xen/console/xencons_ring.h>
+#include <machine/xen/evtchn.h>
+#include <xen/interface/io/console.h>
+
+
+#define console_evtchn	console.domU.evtchn
+extern char *console_page;
+ 
+static inline struct xencons_interface *
+xencons_interface(void)
+{
+	return (struct xencons_interface *)console_page;
+}
+
+
+int
+xencons_has_input(void)
+{
+	struct xencons_interface *intf; 
+
+	intf = xencons_interface();		
+
+	return (intf->in_cons != intf->in_prod);
+}
+
+
+int 
+xencons_ring_send(const char *data, unsigned len)
+{
+	struct xencons_interface *intf; 
+	XENCONS_RING_IDX cons, prod;
+	int sent;
+
+	intf = xencons_interface();
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	sent = 0;
+
+	mb();
+	PANIC_IF((prod - cons) > sizeof(intf->out));
+	
+	while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+		intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+	wmb();
+	intf->out_prod = prod;
+
+	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+	return sent;
+
+}	
+
+
+static xencons_receiver_func *xencons_receiver;
+
+void 
+xencons_handle_input(void *unused)
+{
+	struct xencons_interface *intf;
+	XENCONS_RING_IDX cons, prod;
+
+	intf = xencons_interface();
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+
+	/* XXX needs locking */
+	while (cons != prod) {
+		xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1);
+		cons++;
+	}
+
+	mb();
+	intf->in_cons = cons;
+
+	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+	xencons_tx();
+}
+
+void 
+xencons_ring_register_receiver(xencons_receiver_func *f)
+{
+	xencons_receiver = f;
+}
+
+int
+xencons_ring_init(void)
+{
+	int err;
+
+	if (!xen_start_info->console_evtchn)
+		return 0;
+
+	err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
+					"xencons", xencons_handle_input, NULL,
+					INTR_TYPE_MISC | INTR_MPSAFE, NULL);
+	if (err) {
+		XENPRINTF("XEN console request irq failed %i\n", err);
+		return err;
+	}
+
+	return 0;
+}
+#ifdef notyet
+void 
+xencons_suspend(void)
+{
+
+	if (!xen_start_info->console_evtchn)
+		return;
+
+	unbind_evtchn_from_irqhandler(xen_start_info->console_evtchn, NULL);
+}
+
+void 
+xencons_resume(void)
+{
+
+	(void)xencons_ring_init();
+}
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/console/xencons_ring.h b/sys/dev/xen/console/xencons_ring.h
new file mode 100644
index 0000000..fc97d95
--- /dev/null
+++ b/sys/dev/xen/console/xencons_ring.h
@@ -0,0 +1,20 @@
+/*
+ * $FreeBSD$
+ *
+ */
+#ifndef _XENCONS_RING_H
+#define _XENCONS_RING_H
+
+int xencons_ring_init(void);
+int xencons_ring_send(const char *data, unsigned len);
+void xencons_rx(char *buf, unsigned len);
+void xencons_tx(void);
+
+
+typedef void (xencons_receiver_func)(char *buf, unsigned len);
+void xencons_ring_register_receiver(xencons_receiver_func *f);
+
+void xencons_handle_input(void *unused);
+int xencons_has_input(void);
+
+#endif /* _XENCONS_RING_H */
diff --git a/sys/dev/xen/evtchn/evtchn_dev.c b/sys/dev/xen/evtchn/evtchn_dev.c
new file mode 100644
index 0000000..a206708
--- /dev/null
+++ b/sys/dev/xen/evtchn/evtchn_dev.c
@@ -0,0 +1,394 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+#include <machine/hypervisor.h>
+
+
+typedef struct evtchn_sotfc {
+
+	struct selinfo  ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+
+#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t      evtchn_read;
+static d_write_t     evtchn_write;
+static d_ioctl_t     evtchn_ioctl;
+static d_poll_t      evtchn_poll;
+static d_open_t      evtchn_open;
+static d_close_t     evtchn_close;
+
+
+void 
+evtchn_device_upcall(int port)
+{
+	mtx_lock(&upcall_lock);
+
+	mask_evtchn(port);
+	clear_evtchn(port);
+
+	if ( ring != NULL ) {
+		if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+			ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
+			if ( ring_cons == ring_prod++ ) {
+				wakeup(evtchn_waddr);
+			}
+		}
+		else {
+			ring_overflow = 1;
+		}
+	}
+
+	mtx_unlock(&upcall_lock);
+}
+
+static void 
+__evtchn_reset_buffer_ring(void)
+{
+	/* Initialise the ring to empty. Clear errors. */
+	ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int rc;
+	unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+	count = uio->uio_resid;
+    
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 )
+	{
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	for ( ; ; ) {
+		if ( (c = ring_cons) != (p = ring_prod) )
+			break;
+
+		if ( ring_overflow ) {
+			rc = EFBIG;
+			goto out;
+		}
+
+		if (sst != 0) {
+			rc = EINTR;
+			goto out;
+		}
+
+		/* PCATCH == check for signals before and after sleeping 
+		 * PWAIT == priority of waiting on resource 
+		 */
+		sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
+	}
+	else {
+		bytes1 = (p - c) * sizeof(uint16_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if ( bytes1 > count ) {
+		bytes1 = count;
+		bytes2 = 0;
+	}
+	else if ( (bytes1 + bytes2) > count ) {
+		bytes2 = count - bytes1;
+	}
+    
+	if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
+	     ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+		/* keeping this around as its replacement is not equivalent 
+		 * copyout(&ring[0], &buf[bytes1], bytes2) 
+		 */
+	{
+		rc = EFAULT;
+		goto out;
+	}
+
+	ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+	rc = bytes1 + bytes2;
+
+ out:
+    
+	return rc;
+}
+
+static int 
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int  rc, i, count;
+    
+	count = uio->uio_resid;
+    
+	uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+	if ( kbuf == NULL )
+		return ENOMEM;
+
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 ) {
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	if ( uiomove(kbuf, count, uio) != 0 ) {
+		rc = EFAULT;
+		goto out;
+	}
+
+	mtx_lock_spin(&lock);
+	for ( i = 0; i < (count/2); i++ )
+		if ( test_bit(kbuf[i], &bound_ports[0]) )
+			unmask_evtchn(kbuf[i]);
+	mtx_unlock_spin(&lock);
+
+	rc = count;
+
+ out:
+	free(kbuf, M_DEVBUF);
+	return rc;
+}
+
+static int 
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, 
+	     int mode, struct thread *td __unused)
+{
+	int rc = 0;
+    
+	mtx_lock_spin(&lock);
+    
+	switch ( cmd )
+	{
+	case EVTCHN_RESET:
+		__evtchn_reset_buffer_ring();
+		break;
+	case EVTCHN_BIND:
+		if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
+			unmask_evtchn((int)arg);
+		else
+			rc = EINVAL;
+		break;
+	case EVTCHN_UNBIND:
+		if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
+			mask_evtchn((int)arg);
+		else
+			rc = EINVAL;
+		break;
+	default:
+		rc = ENOSYS;
+		break;
+	}
+
+	mtx_unlock_spin(&lock);   
+
+	return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+	evtchn_softc_t *sc;
+	unsigned int mask = POLLOUT | POLLWRNORM;
+    
+	sc = dev->si_drv1;
+    
+	if ( ring_cons != ring_prod )
+		mask |= POLLIN | POLLRDNORM;
+	else if ( ring_overflow )
+		mask = POLLERR;
+	else
+		selrecord(td, &sc->ev_rsel);
+
+
+	return mask;
+}
+
+
+static int 
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+	uint16_t *_ring;
+    
+	if (flag & O_NONBLOCK)
+		return EBUSY;
+
+	if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+		return EBUSY;
+
+	if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+		return ENOMEM;
+
+	mtx_lock_spin(&lock);
+	ring = _ring;
+	__evtchn_reset_buffer_ring();
+	mtx_unlock_spin(&lock);
+
+
+	return 0;
+}
+
+static int 
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+	int i;
+
+	mtx_lock_spin(&lock);
+	if (ring != NULL) {
+		free(ring, M_DEVBUF);
+		ring = NULL;
+	}
+	for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+		if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+			mask_evtchn(i);
+	mtx_unlock_spin(&lock);
+
+	evtchn_dev_inuse = 0;
+
+	return 0;
+}
+
+static struct cdevsw evtchn_devsw = {
+	d_version:   D_VERSION,
+	d_open:      evtchn_open,
+	d_close:     evtchn_close,
+	d_read:      evtchn_read,
+	d_write:     evtchn_write,
+	d_ioctl:     evtchn_ioctl,
+	d_poll:      evtchn_poll,
+	d_name:      "evtchn",
+	d_flags:     0,
+};
+
+
+/* XXX  - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int 
+evtchn_init(void *dummy __unused)
+{
+	/* XXX I believe we don't need these leaving them here for now until we 
+	 * have some semblance of it working 
+	 */
+	mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+	/* (DEVFS) create '/dev/misc/evtchn'. */
+	evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+	mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+	evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+	bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+	/* XXX I don't think we need any of this rubbish */
+#if 0
+	if ( err != 0 )
+	{
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	/* (DEVFS) create directory '/dev/xen'. */
+	xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+	/* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+	pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
+				  &link_dest[3], 
+				  sizeof(link_dest) - 3);
+	if ( pos >= 0 )
+		strncpy(&link_dest[pos], "../", 3);
+	/* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+	(void)devfs_mk_symlink(xen_dev_dir, 
+			       "evtchn", 
+			       DEVFS_FL_DEFAULT, 
+			       &link_dest[pos],
+			       &symlink_handle, 
+			       NULL);
+
+	/* (DEVFS) automatically destroy the symlink with its destination. */
+	devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+	printk("Event-channel device installed.\n");
+
+	return 0;
+}
+
+
+SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
+
+
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
new file mode 100644
index 0000000..950a68c
--- /dev/null
+++ b/sys/dev/xen/netback/netback.c
@@ -0,0 +1,1585 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_types.h>
+#include <net/ethernet.h>
+#include <net/if_bridgevar.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/in_cksum.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#ifdef XEN_NETBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+#define DDPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
+#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
+#else
+#define DDPRINTF(fmt, args...) ((void)0)
+#define DPRINTF_MBUF(_m) ((void)0)
+#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define BUG_ON PANIC_IF
+
+#define IFNAME(_np) (_np)->ifp->if_xname
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+struct ring_ref {
+	vm_offset_t va;
+	grant_handle_t handle;
+	uint64_t bus_addr;
+};
+
+typedef struct netback_info {
+
+	/* Schedule lists */
+	STAILQ_ENTRY(netback_info) next_tx;
+	STAILQ_ENTRY(netback_info) next_rx;
+	int on_tx_sched_list;
+	int on_rx_sched_list;
+
+	struct xenbus_device *xdev;
+	XenbusState frontend_state;
+
+	domid_t domid;
+	int handle;
+	char *bridge;
+
+	int rings_connected;
+	struct ring_ref tx_ring_ref;
+	struct ring_ref rx_ring_ref;
+	netif_tx_back_ring_t tx;
+	netif_rx_back_ring_t rx;
+	evtchn_port_t evtchn;
+	int irq;
+	void *irq_cookie;
+
+	struct ifnet *ifp;
+	int ref_cnt;
+
+	device_t ndev;
+	int attached;
+} netif_t;
+
+
+#define MAX_PENDING_REQS 256
+#define PKT_PROT_LEN 64
+
+static struct {
+	netif_tx_request_t req;
+	netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static uint16_t pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+/* Freed TX mbufs get batched on this ring before return to pending_ring. */
+static uint16_t dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct task net_tx_task, net_rx_task;
+static struct callout rx_task_callout;
+
+static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
+	STAILQ_HEAD_INITIALIZER(tx_sched_list);
+static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
+	STAILQ_HEAD_INITIALIZER(rx_sched_list);
+static struct mtx tx_sched_list_lock;
+static struct mtx rx_sched_list_lock;
+
+static int vif_unit_maker = 0;
+
+/* Protos */
+static void netback_start(struct ifnet *ifp);
+static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static int vif_add_dev(struct xenbus_device *xdev);
+static void disconnect_rings(netif_t *netif);
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+/* Debug code to display the contents of an mbuf */
+static void
+print_mbuf(struct mbuf *m, int max)
+{
+	int i, j=0;
+	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
+	for (; m; m = m->m_next) {
+		unsigned char *d = m->m_data;
+		for (i=0; i < m->m_len; i++) {
+			if (max && j == max)
+				break;
+			if ((j++ % 16) == 0)
+				printf("\n%04x:", j);
+			printf(" %02x", d[i]);
+		}
+	}
+	printf("\n");
+}
+#endif
+
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+static unsigned long
+alloc_mfn(void)
+{
+	unsigned long mfn = 0;
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = MAX_MFN_ALLOC,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	if ( unlikely(alloc_index == 0) )
+		alloc_index = HYPERVISOR_memory_op(
+			XENMEM_increase_reservation, &reservation);
+	if ( alloc_index != 0 )
+		mfn = mfn_list[--alloc_index];
+	return mfn;
+}
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+	void *pages;
+	int i = 0, j = 0;
+	multicall_entry_t mcl[17];
+	unsigned long mfn_list[16];
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = 0,
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pages == NULL)
+		return 0;
+
+	memset(mcl, 0, sizeof(mcl));
+
+	while (i < nr_pages) {
+		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+		mcl[j].op = __HYPERVISOR_update_va_mapping;
+		mcl[j].args[0] = va;
+
+		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+		if (j == 16 || i == nr_pages) {
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+			reservation.nr_extents = j;
+
+			mcl[j].op = __HYPERVISOR_memory_op;
+			mcl[j].args[0] = XENMEM_decrease_reservation;
+			mcl[j].args[1] =  (unsigned long)&reservation;
+			
+			(void)HYPERVISOR_multicall(mcl, j+1);
+
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+			j = 0;
+		}
+	}
+
+	return (unsigned long)pages;
+}
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+static void
+fixup_checksum(struct mbuf *m)
+{
+	struct ether_header *eh = mtod(m, struct ether_header *);
+	struct ip *ip = (struct ip *)(eh + 1);
+	int iphlen = ip->ip_hl << 2;
+	int iplen = ntohs(ip->ip_len);
+
+	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
+		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+			htons(IPPROTO_TCP + (iplen - iphlen)));
+		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
+		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+	} else {
+		u_short csum;
+		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
+		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+			htons(IPPROTO_UDP + (iplen - iphlen)));
+		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
+			csum = 0xffff;
+		uh->uh_sum = csum;
+		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+	}
+}
+#endif
+
+/* Add the interface to the specified bridge */
+static int
+add_to_bridge(struct ifnet *ifp, char *bridge)
+{
+	struct ifdrv ifd;
+	struct ifbreq ifb;
+	struct ifnet *ifp_bridge = ifunit(bridge);
+
+	if (!ifp_bridge)
+		return ENOENT;
+
+	bzero(&ifd, sizeof(ifd));
+	bzero(&ifb, sizeof(ifb));
+
+	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
+	strcpy(ifd.ifd_name, ifp->if_xname);
+	ifd.ifd_cmd = BRDGADD;
+	ifd.ifd_len = sizeof(ifb);
+	ifd.ifd_data = &ifb;
+
+	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
+	
+}
+
+static int
+netif_create(int handle, struct xenbus_device *xdev, char *bridge)
+{
+	netif_t *netif;
+	struct ifnet *ifp;
+
+	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!netif)
+		return ENOMEM;
+
+	netif->ref_cnt = 1;
+	netif->handle = handle;
+	netif->domid = xdev->otherend_id;
+	netif->xdev = xdev;
+	netif->bridge = bridge;
+	xdev->data = netif;
+
+	/* Set up ifnet structure */
+	ifp = netif->ifp = if_alloc(IFT_ETHER);
+	if (!ifp) {
+		if (bridge)
+			free(bridge, M_DEVBUF);
+		free(netif, M_DEVBUF);
+		return ENOMEM;
+	}
+
+	ifp->if_softc = netif;
+	if_initname(ifp, "vif",
+		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+	ifp->if_output = ether_output;
+	ifp->if_start = netback_start;
+	ifp->if_ioctl = netback_ioctl;
+	ifp->if_mtu = ETHERMTU;
+	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+	
+	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+
+	return 0;
+}
+
+static void
+netif_get(netif_t *netif)
+{
+	atomic_add_int(&netif->ref_cnt, 1);
+}
+
+static void
+netif_put(netif_t *netif)
+{
+	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
+		DPRINTF("%s\n", IFNAME(netif));
+		disconnect_rings(netif);
+		if (netif->ifp) {
+			if_free(netif->ifp);
+			netif->ifp = NULL;
+		}
+		if (netif->bridge)
+			free(netif->bridge, M_DEVBUF);
+		free(netif, M_DEVBUF);
+	}
+}
+
+static int
+netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
+			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
+		return 0;
+	}
+
+	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
+
+	return ether_ioctl(ifp, cmd, data);
+}
+
+static inline void
+maybe_schedule_tx_action(void)
+{
+	smp_mb();
+	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
+		taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_tx_schedule_list(void)
+{
+	netif_t *netif;
+
+	mtx_lock(&tx_sched_list_lock);
+
+	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
+		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
+		STAILQ_NEXT(netif, next_tx) = NULL;
+		netif->on_tx_sched_list = 0;
+	}
+
+	mtx_unlock(&tx_sched_list_lock);
+
+	return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_tx_schedule_list_tail(netif_t *netif)
+{
+	if (netif->on_tx_sched_list)
+		return;
+
+	mtx_lock(&tx_sched_list_lock);
+	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		netif_get(netif);
+		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
+		netif->on_tx_sched_list = 1;
+	}
+	mtx_unlock(&tx_sched_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+static void
+netif_schedule_tx_work(netif_t *netif)
+{
+	int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+	if (more_to_do) {
+		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
+		add_to_tx_schedule_list_tail(netif);
+		maybe_schedule_tx_action();
+	}
+}
+
+static struct mtx dealloc_lock;
+MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
+
+static void
+netif_idx_release(uint16_t pending_idx)
+{
+	mtx_lock_spin(&dealloc_lock);
+	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+	mtx_unlock_spin(&dealloc_lock);
+
+	taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
+}
+
+static void
+make_tx_response(netif_t *netif, 
+				 uint16_t    id,
+				 int8_t      st)
+{
+	RING_IDX i = netif->tx.rsp_prod_pvt;
+	netif_tx_response_t *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->tx, i);
+	resp->id     = id;
+	resp->status = st;
+
+	netif->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	if (i == netif->tx.req_cons) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+		if (more_to_do)
+			add_to_tx_schedule_list_tail(netif);
+	}
+#endif
+}
+
+inline static void
+net_tx_action_dealloc(void)
+{
+	gnttab_unmap_grant_ref_t *gop;
+	uint16_t pending_idx;
+	PEND_RING_IDX dc, dp;
+	netif_t *netif;
+	int ret;
+
+	dc = dealloc_cons;
+	dp = dealloc_prod;
+
+	/*
+	 * Free up any grants we have finished using
+	 */
+	gop = tx_unmap_ops;
+	while (dc != dp) {
+		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+		gop->host_addr    = MMAP_VADDR(pending_idx);
+		gop->dev_bus_addr = 0;
+		gop->handle       = grant_tx_handle[pending_idx];
+		gop++;
+	}
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+	BUG_ON(ret);
+
+	while (dealloc_cons != dp) {
+		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+		netif = pending_tx_info[pending_idx].netif;
+
+		make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
+				 NETIF_RSP_OKAY);
+        
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+		netif_put(netif);
+	}
+}
+
+static void
+netif_page_release(void *buf, void *args)
+{
+	uint16_t pending_idx = (unsigned int)args;
+	
+	DDPRINTF("pending_idx=%u\n", pending_idx);
+
+	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
+
+	netif_idx_release(pending_idx);
+}
+
+static void
+net_tx_action(void *context, int pending)
+{
+	struct mbuf *m;
+	netif_t *netif;
+	netif_tx_request_t txreq;
+	uint16_t pending_idx;
+	RING_IDX i;
+	gnttab_map_grant_ref_t *mop;
+	int ret, work_to_do;
+	struct mbuf *txq = NULL, *txq_last = NULL;
+
+	if (dealloc_cons != dealloc_prod)
+		net_tx_action_dealloc();
+
+	mop = tx_map_ops;
+	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
+
+		/* Get a netif from the list with work to do. */
+		netif = remove_from_tx_schedule_list();
+
+		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
+				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
+
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+		if (!work_to_do) {
+			netif_put(netif);
+			continue;
+		}
+
+		i = netif->tx.req_cons;
+		rmb(); /* Ensure that we see the request before we copy it. */
+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
+		/* If we want credit-based scheduling, coud add it here - WORK */
+
+		netif->tx.req_cons++;
+
+		netif_schedule_tx_work(netif);
+
+		if (unlikely(txreq.size < ETHER_HDR_LEN) || 
+		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
+			WPRINTF("Bad packet size: %d\n", txreq.size);
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			continue; 
+		}
+
+		/* No crossing a page as the payload mustn't fragment. */
+		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+			WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 
+				txreq.offset, txreq.size, 
+				(txreq.offset & PAGE_MASK) + txreq.size);
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			continue;
+		}
+
+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (!m) {
+			WPRINTF("Failed to allocate mbuf\n");
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			break;
+		}
+		m->m_pkthdr.rcvif = netif->ifp;
+
+		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
+			struct mbuf *n;
+			MGET(n, M_DONTWAIT, MT_DATA);
+			if (!(m->m_next = n)) {
+				m_freem(m);
+				WPRINTF("Failed to allocate second mbuf\n");
+				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+				netif_put(netif);
+				break;
+			}
+			n->m_len = txreq.size - PKT_PROT_LEN;
+			m->m_len = PKT_PROT_LEN;
+		} else
+			m->m_len = txreq.size;
+
+		mop->host_addr = MMAP_VADDR(pending_idx);
+		mop->dom       = netif->domid;
+		mop->ref       = txreq.gref;
+		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
+		mop++;
+
+		memcpy(&pending_tx_info[pending_idx].req,
+		       &txreq, sizeof(txreq));
+		pending_tx_info[pending_idx].netif = netif;
+		*((uint16_t *)m->m_data) = pending_idx;
+
+		if (txq_last)
+			txq_last->m_nextpkt = m;
+		else
+			txq = m;
+		txq_last = m;
+
+		pending_cons++;
+
+		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+			break;
+	}
+
+	if (!txq)
+		return;
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+	BUG_ON(ret);
+
+	mop = tx_map_ops;
+	while ((m = txq) != NULL) {
+		caddr_t data;
+
+		txq = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		pending_idx = *((uint16_t *)m->m_data);
+		netif       = pending_tx_info[pending_idx].netif;
+		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
+
+		/* Check the remap error code. */
+		if (unlikely(mop->status)) {
+			WPRINTF("#### netback grant fails\n");
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			m_freem(m);
+			mop++;
+			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+			continue;
+		}
+
+#if 0
+		/* Can't do this in FreeBSD since vtophys() returns the pfn */
+		/* of the remote domain who loaned us the machine page - DPT */
+		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
+			mop->dev_bus_addr >> PAGE_SHIFT;
+#endif
+		grant_tx_handle[pending_idx] = mop->handle;
+
+		/* Setup data in mbuf (lengths are already set) */
+		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
+		bcopy(data, m->m_data, m->m_len);
+		if (m->m_next) {
+			struct mbuf *n = m->m_next;
+			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
+				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
+			n->m_data = &data[PKT_PROT_LEN];
+		} else {
+			/* Schedule a response immediately. */
+			netif_idx_release(pending_idx);
+		}
+
+		if ((txreq.flags & NETTXF_data_validated)) {
+			/* Tell the stack the checksums are okay */
+			m->m_pkthdr.csum_flags |=
+				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+
+		/* If necessary, inform stack to compute the checksums if it forwards the packet */
+		if ((txreq.flags & NETTXF_csum_blank)) {
+			struct ether_header *eh = mtod(m, struct ether_header *);
+			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
+				struct ip *ip = (struct ip *)&m->m_data[14];
+				if (ip->ip_p == IPPROTO_TCP)
+					m->m_pkthdr.csum_flags |= CSUM_TCP;
+				else if (ip->ip_p == IPPROTO_UDP)
+					m->m_pkthdr.csum_flags |= CSUM_UDP;
+			}
+		}
+
+		netif->ifp->if_ibytes += m->m_pkthdr.len;
+		netif->ifp->if_ipackets++;
+
+		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
+			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
+		DPRINTF_MBUF_LEN(m, 128);
+
+		(*netif->ifp->if_input)(netif->ifp, m);
+
+		mop++;
+	}
+}
+
+/* Handle interrupt from a frontend */
+static void
+netback_intr(void *arg)
+{
+	netif_t *netif = arg;
+	DDPRINTF("%s\n", IFNAME(netif));
+	add_to_tx_schedule_list_tail(netif);
+	maybe_schedule_tx_action();
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_rx_schedule_list(void)
+{
+	netif_t *netif;
+
+	mtx_lock(&rx_sched_list_lock);
+
+	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
+		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
+		STAILQ_NEXT(netif, next_rx) = NULL;
+		netif->on_rx_sched_list = 0;
+	}
+
+	mtx_unlock(&rx_sched_list_lock);
+
+	return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_rx_schedule_list_tail(netif_t *netif)
+{
+	if (netif->on_rx_sched_list)
+		return;
+
+	mtx_lock(&rx_sched_list_lock);
+	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		netif_get(netif);
+		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
+		netif->on_rx_sched_list = 1;
+	}
+	mtx_unlock(&rx_sched_list_lock);
+}
+
+static int
+make_rx_response(netif_t *netif, uint16_t id, int8_t st,
+				 uint16_t offset, uint16_t size, uint16_t flags)
+{
+	RING_IDX i = netif->rx.rsp_prod_pvt;
+	netif_rx_response_t *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (int16_t)size;
+	if (st < 0)
+		resp->status = (int16_t)st;
+
+	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
+		i, resp->offset, resp->flags, resp->id, resp->status);
+
+	netif->rx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+
+	return notify;
+}
+
+static int
+netif_rx(netif_t *netif)
+{
+	struct ifnet *ifp = netif->ifp;
+	struct mbuf *m;
+	multicall_entry_t *mcl;
+	mmu_update_t *mmu;
+	gnttab_transfer_t *gop;
+	unsigned long vdata, old_mfn, new_mfn;
+	struct mbuf *rxq = NULL, *rxq_last = NULL;
+	int ret, notify = 0, pkts_dequeued = 0;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	mcl = rx_mcl;
+	mmu = rx_mmu;
+	gop = grant_rx_op;
+
+	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+		
+		/* Quit if the target domain has no receive buffers */
+		if (netif->rx.req_cons == netif->rx.sring->req_prod)
+			break;
+
+		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+		if (m == NULL)
+			break;
+
+		pkts_dequeued++;
+
+		/* Check if we need to copy the data */
+		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
+			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
+			struct mbuf *n;
+				
+			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
+				m->m_flags,
+				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
+				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
+				(unsigned int)m->m_next);
+
+			/* Make copy */
+			MGETHDR(n, M_DONTWAIT, MT_DATA);
+			if (!n)
+				goto drop;
+
+			MCLGET(n, M_DONTWAIT);
+			if (!(n->m_flags & M_EXT)) {
+				m_freem(n);
+				goto drop;
+			}
+
+			/* Leave space at front and keep current alignment */
+			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
+
+			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
+				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
+				m_freem(n);
+				goto drop;
+			}
+			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
+			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
+			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
+			m_freem(m);
+			m = n;
+		}
+
+		vdata = (unsigned long)m->m_data;
+		old_mfn = vtomach(vdata) >> PAGE_SHIFT;
+
+		if ((new_mfn = alloc_mfn()) == 0)
+			goto drop;
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+		/* Check if we need to compute a checksum.  This happens */
+		/* when bridging from one domain to another. */
+		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+			fixup_checksum(m);
+#endif
+
+		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
+
+		mcl->op = __HYPERVISOR_update_va_mapping;
+		mcl->args[0] = vdata;
+		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
+		mcl->args[2] = 0;
+		mcl->args[3] = 0;
+		mcl++;
+
+		gop->mfn = old_mfn;
+		gop->domid = netif->domid;
+		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
+		netif->rx.req_cons++;
+		gop++;
+
+		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+		mmu->val = vtophys(vdata) >> PAGE_SHIFT;  
+		mmu++;
+
+		if (rxq_last)
+			rxq_last->m_nextpkt = m;
+		else
+			rxq = m;
+		rxq_last = m;
+
+		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
+		DPRINTF_MBUF_LEN(m, 128);
+
+		/* Filled the batch queue? */
+		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
+			break;		
+
+		continue;
+	drop:
+		DDPRINTF("dropping pkt\n");
+		ifp->if_oerrors++;
+		m_freem(m);
+	}
+
+	if (mcl == rx_mcl)
+		return pkts_dequeued;
+
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)rx_mmu;
+	mcl->args[1] = mmu - rx_mmu;
+	mcl->args[2] = 0;
+	mcl->args[3] = DOMID_SELF;
+	mcl++;
+
+	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+	BUG_ON(ret != 0);
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
+	BUG_ON(ret != 0);
+
+	mcl = rx_mcl;
+	gop = grant_rx_op;
+
+	while ((m = rxq) != NULL) {
+		int8_t status;
+		uint16_t id, flags = 0;
+
+		rxq = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		/* Rederive the machine addresses. */
+		new_mfn = mcl->args[1] >> PAGE_SHIFT;
+		old_mfn = gop->mfn;
+
+		ifp->if_obytes += m->m_pkthdr.len;
+		ifp->if_opackets++;
+
+		/* The update_va_mapping() must not fail. */
+		BUG_ON(mcl->result != 0);
+
+		/* Setup flags */
+		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+			flags |= NETRXF_data_validated;
+
+		/* Check the reassignment error code. */
+		status = NETIF_RSP_OKAY;
+		if (gop->status != 0) { 
+			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
+				gop->status, netif->domid);
+			/*
+			 * Page no longer belongs to us unless GNTST_bad_page,
+			 * but that should be a fatal error anyway.
+			 */
+			BUG_ON(gop->status == GNTST_bad_page);
+			status = NETIF_RSP_ERROR; 
+		}
+		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
+		notify |= make_rx_response(netif, id, status,
+					(unsigned long)m->m_data & PAGE_MASK,
+					m->m_pkthdr.len, flags);
+
+		m_freem(m);
+		mcl++;
+		gop++;
+	}
+
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+
+	return pkts_dequeued;
+}
+
+static void
+rx_task_timer(void *arg)
+{
+	DDPRINTF("\n");
+	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
+}
+
+static void
+net_rx_action(void *context, int pending)
+{
+	netif_t *netif, *last_zero_work = NULL;
+
+	DDPRINTF("\n");
+
+	while ((netif = remove_from_rx_schedule_list())) {
+		struct ifnet *ifp = netif->ifp;
+
+		if (netif == last_zero_work) {
+			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+				add_to_rx_schedule_list_tail(netif);
+			netif_put(netif);
+			if (!STAILQ_EMPTY(&rx_sched_list))
+				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
+			break;
+		}
+
+		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+			if (netif_rx(netif))
+				last_zero_work = NULL;
+			else if (!last_zero_work)
+				last_zero_work = netif;
+			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+				add_to_rx_schedule_list_tail(netif);
+		}
+
+		netif_put(netif);
+	}
+}
+
+static void
+netback_start(struct ifnet *ifp)
+{
+	netif_t *netif = (netif_t *)ifp->if_softc;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	add_to_rx_schedule_list_tail(netif);
+	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
+}
+
+/* Map a grant ref to a ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+	struct gnttab_map_grant_ref op;
+
+	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	if (ring->va == 0)
+		return ENOMEM;
+
+	op.host_addr = ring->va;
+	op.flags = GNTMAP_host_map;
+	op.ref = ref;
+	op.dom = dom;
+	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+	if (op.status) {
+		WPRINTF("grant table op err=%d\n", op.status);
+		kmem_free(kernel_map, ring->va, PAGE_SIZE);
+		ring->va = 0;
+		return EACCES;
+	}
+
+	ring->handle = op.handle;
+	ring->bus_addr = op.dev_bus_addr;
+
+	return 0;
+}
+
+/* Unmap grant ref for a ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	op.host_addr = ring->va;
+	op.dev_bus_addr = ring->bus_addr;
+	op.handle = ring->handle;
+	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+	if (op.status)
+		WPRINTF("grant table op err=%d\n", op.status);
+
+	kmem_free(kernel_map, ring->va, PAGE_SIZE);
+	ring->va = 0;
+}
+
+static int
+connect_rings(netif_t *netif)
+{
+	struct xenbus_device *xdev = netif->xdev;
+	netif_tx_sring_t *txs;
+	netif_rx_sring_t *rxs;
+	unsigned long tx_ring_ref, rx_ring_ref;
+	evtchn_port_t evtchn;
+	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+	int err;
+
+	// Grab FE data and map his memory
+	err = xenbus_gather(NULL, xdev->otherend,
+			"tx-ring-ref", "%lu", &tx_ring_ref,
+		    "rx-ring-ref", "%lu", &rx_ring_ref,
+		    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(xdev, err,
+			"reading %s/ring-ref and event-channel",
+			xdev->otherend);
+		return err;
+	}
+
+	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "mapping tx ring");
+		return err;
+	}
+	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
+	if (err) {
+		unmap_ring(&netif->tx_ring_ref);
+		xenbus_dev_fatal(xdev, err, "mapping rx ring");
+		return err;
+	}
+	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+	op.u.bind_interdomain.remote_dom = netif->domid;
+	op.u.bind_interdomain.remote_port = evtchn;
+	err = HYPERVISOR_event_channel_op(&op);
+	if (err) {
+		unmap_ring(&netif->tx_ring_ref);
+		unmap_ring(&netif->rx_ring_ref);
+		xenbus_dev_fatal(xdev, err, "binding event channel");
+		return err;
+	}
+	netif->evtchn = op.u.bind_interdomain.local_port;
+
+	/* bind evtchn to irq handler */
+	netif->irq =
+		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
+			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
+
+	netif->rings_connected = 1;
+
+	DPRINTF("%s connected! evtchn=%d irq=%d\n",
+		IFNAME(netif), netif->evtchn, netif->irq);
+
+	return 0;
+}
+
+static void
+disconnect_rings(netif_t *netif)
+{
+	DPRINTF("\n");
+
+	if (netif->rings_connected) {
+		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
+		netif->irq = 0;
+		unmap_ring(&netif->tx_ring_ref);
+		unmap_ring(&netif->rx_ring_ref);
+		netif->rings_connected = 0;
+	}
+}
+
+static void
+connect(netif_t *netif)
+{
+	if (!netif->xdev ||
+		!netif->attached ||
+		netif->frontend_state != XenbusStateConnected) {
+		return;
+	}
+
+	if (!connect_rings(netif)) {
+		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
+
+		/* Turn on interface */
+		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		netif->ifp->if_flags |= IFF_UP;
+	}
+}
+
+static int
+netback_remove(struct xenbus_device *xdev)
+{
+	netif_t *netif = xdev->data;
+	device_t ndev;
+
+	DPRINTF("remove %s\n", xdev->nodename);
+
+	if ((ndev = netif->ndev)) {
+		netif->ndev = NULL;
+		mtx_lock(&Giant);
+		device_detach(ndev);
+		mtx_unlock(&Giant);
+	}
+
+	xdev->data = NULL;
+	netif->xdev = NULL;
+	netif_put(netif);
+
+	return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the frontend.
+ * Switch to Connected state.
+ */
+static int
+netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+	int err;
+	long handle;
+	char *bridge;
+	
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	/* Grab the handle */
+	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
+	if (err != 1) {
+		xenbus_dev_fatal(xdev, err, "reading handle");
+		return err;
+	}
+
+	/* Check for bridge */
+	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
+	if (IS_ERR(bridge))
+		bridge = NULL;
+
+	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing switch state");
+		return err;
+	}
+
+	err = netif_create(handle, xdev, bridge);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "creating netif");
+		return err;
+	}
+
+	err = vif_add_dev(xdev);
+	if (err) {
+		netif_put((netif_t *)xdev->data);
+		xenbus_dev_fatal(xdev, err, "adding vif device");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netback_resume(struct xenbus_device *xdev)
+{
+	DPRINTF("node=%s\n", xdev->nodename);
+	return 0;
+}
+
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *xdev,
+							 XenbusState frontend_state)
+{
+	netif_t *netif = xdev->data;
+
+	DPRINTF("state=%d\n", frontend_state);
+	
+	netif->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+		break;
+	case XenbusStateConnected:
+		connect(netif);
+		break;
+	case XenbusStateClosing:
+		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+		break;
+	case XenbusStateClosed:
+		xenbus_remove_device(xdev);
+		break;
+	case XenbusStateUnknown:
+	case XenbusStateInitWait:
+		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+						 frontend_state);
+		break;
+	}
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id netback_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+static struct xenbus_driver netback = {
+	.name = "netback",
+	.ids = netback_ids,
+	.probe = netback_probe,
+	.remove = netback_remove,
+	.resume= netback_resume,
+	.otherend_changed = frontend_changed,
+};
+
+static void
+netback_init(void *unused)
+{
+	callout_init(&rx_task_callout, CALLOUT_MPSAFE);
+
+	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
+	BUG_ON(!mmap_vstart);
+
+	pending_cons = 0;
+	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
+		pending_ring[pending_prod] = pending_prod;
+
+	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
+	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
+	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
+	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
+
+	DPRINTF("registering %s\n", netback.name);
+
+	xenbus_register_backend(&netback);
+}
+
+SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
+
+static int
+vif_add_dev(struct xenbus_device *xdev)
+{
+	netif_t *netif = xdev->data;
+	device_t nexus, ndev;
+	devclass_t dc;
+	int err = 0;
+
+	mtx_lock(&Giant);
+
+	/* We will add a vif device as a child of nexus0 (for now) */
+	if (!(dc = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(dc, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		err = ENOENT;
+		goto done;
+	}
+
+
+	/* Create a newbus device representing the vif */
+	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
+	if (!ndev) {
+		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
+		err = EFAULT;
+		goto done;
+	}
+	
+	netif_get(netif);
+	device_set_ivars(ndev, netif);
+	netif->ndev = ndev;
+
+	device_probe_and_attach(ndev);
+
+ done:
+
+	mtx_unlock(&Giant);
+
+	return err;
+}
+
+enum {
+	VIF_SYSCTL_DOMID,
+	VIF_SYSCTL_HANDLE,
+	VIF_SYSCTL_TXRING,
+	VIF_SYSCTL_RXRING,
+};
+
+static char *
+vif_sysctl_ring_info(netif_t *netif, int cmd)
+{
+	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+	if (buf) {
+		if (!netif->rings_connected)
+			sprintf(buf, "rings not connected\n");
+		else if (cmd == VIF_SYSCTL_TXRING) {
+			netif_tx_back_ring_t *tx = &netif->tx;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					tx->nr_ents, tx->req_cons,
+					tx->sring->req_prod, tx->sring->req_event,
+					tx->sring->rsp_prod, tx->sring->rsp_event);
+		} else {
+			netif_rx_back_ring_t *rx = &netif->rx;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					rx->nr_ents, rx->req_cons,
+					rx->sring->req_prod, rx->sring->req_event,
+					rx->sring->rsp_prod, rx->sring->rsp_event);
+		}
+	}
+	return buf;
+}
+
+static int
+vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev = (device_t)arg1;
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	const char *value;
+	char *buf = NULL;
+	int err;
+
+	switch (arg2) {
+	case VIF_SYSCTL_DOMID:
+		return sysctl_handle_int(oidp, NULL, netif->domid, req);
+	case VIF_SYSCTL_HANDLE:
+		return sysctl_handle_int(oidp, NULL, netif->handle, req);
+	case VIF_SYSCTL_TXRING:
+	case VIF_SYSCTL_RXRING:
+		value = buf = vif_sysctl_ring_info(netif, arg2);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	err = SYSCTL_OUT(req, value, strlen(value));
+	if (buf != NULL)
+		free(buf, M_DEVBUF);
+
+	return err;
+}
+
+/* Newbus vif device driver probe */
+static int
+vif_probe(device_t dev)
+{
+	DDPRINTF("vif%d\n", device_get_unit(dev));
+	return 0;
+}
+
+/* Newbus vif device driver attach */
+static int
+vif_attach(device_t dev) 
+{
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	uint8_t mac[ETHER_ADDR_LEN];
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
+	    "domid of frontend");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
+	    "handle of frontend");
+#ifdef XEN_NETBACK_DEBUG
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "txring", CTLFLAG_RD,
+	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
+	    "tx ring info");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "rxring", CTLFLAG_RD,
+	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
+	    "rx ring info");
+#endif
+
+	memset(mac, 0xff, sizeof(mac));
+	mac[0] &= ~0x01;
+	
+	ether_ifattach(netif->ifp, mac);
+	netif->attached = 1;
+
+	connect(netif);
+
+	if (netif->bridge) {
+		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
+		int err = add_to_bridge(netif->ifp, netif->bridge);
+		if (err) {
+			WPRINTF("Error adding %s to %s; err=%d\n",
+				IFNAME(netif), netif->bridge, err);
+		}
+	}
+
+	return bus_generic_attach(dev);
+}
+
+/* Newbus vif device driver detach */
+static int
+vif_detach(device_t dev)
+{
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	struct ifnet *ifp = netif->ifp;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	/* Tell the stack that the interface is no longer active */
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+	ether_ifdetach(ifp);
+
+	bus_generic_detach(dev);
+
+	netif->attached = 0;
+
+	netif_put(netif);
+
+	return 0;
+}
+
+static device_method_t vif_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vif_probe),
+	DEVMETHOD(device_attach, 	vif_attach),
+	DEVMETHOD(device_detach,	vif_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	{0, 0}
+};
+
+static devclass_t vif_devclass;
+
+static driver_t vif_driver = {
+	"vif",
+	vif_methods,
+	0,
+};
+
+DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/netfront/mbufq.h b/sys/dev/xen/netfront/mbufq.h
new file mode 100644
index 0000000..0d6c604
--- /dev/null
+++ b/sys/dev/xen/netfront/mbufq.h
@@ -0,0 +1,123 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef CXGB_MBUFQ_H_
+#define CXGB_MBUFQ_H_
+
+struct mbuf_head {
+	struct mbuf *head;
+	struct mbuf *tail;
+	uint32_t     qlen;
+	uint32_t     qsize;
+	struct mtx   lock;
+};
+
+static __inline void
+mbufq_init(struct mbuf_head *l)
+{
+	l->head = l->tail = NULL;
+	l->qlen = l->qsize = 0;
+}
+
+static __inline int
+mbufq_empty(struct mbuf_head *l)
+{
+	return (l->head == NULL);
+}
+
+static __inline int
+mbufq_len(struct mbuf_head *l)
+{
+	return (l->qlen);
+}
+
+static __inline int
+mbufq_size(struct mbuf_head *l)
+{
+	return (l->qsize);
+}
+
+static __inline int
+mbufq_head_size(struct mbuf_head *l)
+{
+	return (l->head ? l->head->m_pkthdr.len : 0);
+}
+
+static __inline void
+mbufq_tail(struct mbuf_head *l, struct mbuf *m)
+{
+	l->qlen++;
+	if (l->head == NULL)
+		l->head = m;
+	else
+		l->tail->m_nextpkt = m;
+	l->tail = m;
+	l->qsize += m->m_pkthdr.len;
+}
+
+static __inline struct mbuf *
+mbufq_dequeue(struct mbuf_head *l)
+{
+	struct mbuf *m;
+
+	m = l->head;
+	if (m) {
+		if (m == l->tail) 
+			l->head = l->tail = NULL;
+		else
+			l->head = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		l->qlen--;
+		l->qsize -= m->m_pkthdr.len;
+	}
+
+	return (m);
+}
+
+static __inline struct mbuf *
+mbufq_peek(struct mbuf_head *l)
+{
+	return (l->head);
+}
+
+static __inline void
+mbufq_append(struct mbuf_head *a, struct mbuf_head *b)
+{
+	if (a->tail) 
+		a->tail->m_nextpkt = b->head;
+	if (b->tail)
+		a->tail = b->tail;
+	a->qlen += b->qlen;
+	a->qsize += b->qsize;
+	
+	
+}
+#endif  /* CXGB_MBUFQ_H_ */
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
new file mode 100644
index 0000000..fd174b6
--- /dev/null
+++ b/sys/dev/xen/netfront/netfront.c
@@ -0,0 +1,1829 @@
+/*
+ *
+ * Copyright (c) 2004-2006 Kip Macy
+ * All rights reserved.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/sx.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/clock.h>      /* for DELAY */
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <machine/xen/evtchn.h>
+#include <machine/xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/interface/memory.h>
+#include <dev/xen/netfront/mbufq.h>
+#include <machine/xen/features.h>
+#include <xen/interface/io/netif.h>
+
+
+#define GRANT_INVALID_REF	0
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
+#define RX_COPY_THRESHOLD 256
+
+#define net_ratelimit() 0
+
+struct netfront_info;
+struct netfront_rx_info;
+
+static void xn_txeof(struct netfront_info *);
+static void xn_rxeof(struct netfront_info *);
+static void network_alloc_rx_buffers(struct netfront_info *);
+
+static void xn_tick_locked(struct netfront_info *);
+static void xn_tick(void *);
+
+static void xn_intr(void *);
+static void xn_start_locked(struct ifnet *);
+static void xn_start(struct ifnet *);
+static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
+static void xn_ifinit_locked(struct netfront_info *);
+static void xn_ifinit(void *);
+static void xn_stop(struct netfront_info *);
+#ifdef notyet
+static void xn_watchdog(struct ifnet *);
+#endif
+
+static void show_device(struct netfront_info *sc);
+#ifdef notyet
+static void netfront_closing(struct xenbus_device *dev);
+#endif
+static void netif_free(struct netfront_info *info);
+static int netfront_remove(struct xenbus_device *dev);
+
+static int talk_to_backend(struct xenbus_device *dev, struct netfront_info *info);
+static int create_netdev(struct xenbus_device *dev, struct ifnet **ifp);
+static void netif_disconnect_backend(struct netfront_info *info);
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info);
+static void end_access(int ref, void *page);
+
+/* Xenolinux helper functions */
+static int network_connect(struct ifnet *ifp);
+
+static void xn_free_rx_ring(struct netfront_info *);
+
+static void xn_free_tx_ring(struct netfront_info *);
+
+static int xennet_get_responses(struct netfront_info *np,
+	struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf_head *list,
+	int *pages_flipped_p);
+
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+#define INVALID_P2M_ENTRY (~0UL)
+
+/*
+ * Mbuf pointers. We need these to keep track of the virtual addresses
+ * of our mbuf chains since we can only convert from virtual to physical,
+ * not the other way around.  The size must track the free index arrays.
+ */
+struct xn_chain_data {
+	struct mbuf		*xn_tx_chain[NET_TX_RING_SIZE+1];
+        struct mbuf		*xn_rx_chain[NET_RX_RING_SIZE+1];
+};
+
+
+struct net_device_stats
+{
+	u_long	rx_packets;		/* total packets received	*/
+	u_long	tx_packets;		/* total packets transmitted	*/
+	u_long	rx_bytes;		/* total bytes received 	*/
+	u_long	tx_bytes;		/* total bytes transmitted	*/
+	u_long	rx_errors;		/* bad packets received		*/
+	u_long	tx_errors;		/* packet transmit problems	*/
+	u_long	rx_dropped;		/* no space in linux buffers	*/
+	u_long	tx_dropped;		/* no space available in linux	*/
+	u_long	multicast;		/* multicast packets received	*/
+	u_long	collisions;
+
+	/* detailed rx_errors: */
+	u_long	rx_length_errors;
+	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
+	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
+	u_long	rx_frame_errors;	/* recv'd frame alignment error */
+	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	u_long	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	u_long	tx_aborted_errors;
+	u_long	tx_carrier_errors;
+	u_long	tx_fifo_errors;
+	u_long	tx_heartbeat_errors;
+	u_long	tx_window_errors;
+	
+	/* for cslip etc */
+	u_long	rx_compressed;
+	u_long	tx_compressed;
+};
+
+struct netfront_info {
+		
+	struct ifnet *xn_ifp;
+
+	struct net_device_stats stats;
+	u_int tx_full;
+
+	netif_tx_front_ring_t tx;
+	netif_rx_front_ring_t rx;
+
+	struct mtx   tx_lock;
+	struct mtx   rx_lock;
+	struct sx    sc_lock;
+
+	u_int handle;
+	u_int irq;
+	u_int copying_receiver;
+	u_int carrier;
+		
+	/* Receive-ring batched refills. */
+#define RX_MIN_TARGET 32
+#define RX_MAX_TARGET NET_RX_RING_SIZE
+	int rx_min_target, rx_max_target, rx_target;
+
+	/*
+	 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+	 * array is an index into a chain of free entries.
+	 */
+
+	grant_ref_t gref_tx_head;
+	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
+	grant_ref_t gref_rx_head;
+	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
+
+#define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
+	struct xenbus_device *xbdev;
+	int tx_ring_ref;
+	int rx_ring_ref;
+	uint8_t mac[ETHER_ADDR_LEN];
+	struct xn_chain_data	xn_cdata;	/* mbufs */
+	struct mbuf_head xn_rx_batch;	/* head of the batch queue */
+
+	int			xn_if_flags;
+	struct callout	        xn_stat_ch;
+
+	u_long rx_pfn_array[NET_RX_RING_SIZE];
+	multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+	mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+};
+
+#define rx_mbufs xn_cdata.xn_rx_chain
+#define tx_mbufs xn_cdata.xn_tx_chain
+
+#define XN_LOCK_INIT(_sc, _name) \
+        mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
+        mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
+        sx_init(&(_sc)->sc_lock, #_name"_rx")
+
+#define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
+#define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
+
+#define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
+#define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
+
+#define XN_LOCK(_sc)           sx_xlock(&(_sc)->sc_lock); 
+#define XN_UNLOCK(_sc)         sx_xunlock(&(_sc)->sc_lock); 
+
+#define XN_LOCK_ASSERT(_sc)    sx_assert(&(_sc)->sc_lock, SX_LOCKED); 
+#define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
+#define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
+#define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
+                               mtx_destroy(&(_sc)->tx_lock); \
+                               sx_destroy(&(_sc)->sc_lock);
+
+struct netfront_rx_info {
+	struct netif_rx_response rx;
+	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+#define netfront_carrier_on(netif)	((netif)->carrier = 1)
+#define netfront_carrier_off(netif)	((netif)->carrier = 0)
+#define netfront_carrier_ok(netif)	((netif)->carrier)
+
+/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
+
+
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static inline void
+add_id_to_freelist(struct mbuf **list, unsigned short id)
+{
+	list[id] = list[0];
+	list[0]  = (void *)(u_long)id;
+}
+
+static inline unsigned short
+get_id_from_freelist(struct mbuf **list)
+{
+	u_int id = (u_int)(u_long)list[0];
+	list[0] = list[id];
+	return (id);
+}
+
+static inline int
+xennet_rxidx(RING_IDX idx)
+{
+	return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static inline struct mbuf *
+xennet_get_rx_mbuf(struct netfront_info *np,
+						RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	struct mbuf *m;
+
+	m = np->rx_mbufs[i];
+	np->rx_mbufs[i] = NULL;
+	return (m);
+}
+
+static inline grant_ref_t
+xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	grant_ref_t ref = np->grant_rx_ref[i];
+	np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	return ref;
+}
+
+#ifdef DEBUG
+
+#endif
+#define IPRINTK(fmt, args...) \
+    printf("[XEN] " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printf("[XEN] " fmt, ##args)
+#define DPRINTK(fmt, args...) \
+    printf("[XEN] " fmt, ##args)
+
+
+static __inline struct mbuf* 
+makembuf (struct mbuf *buf)
+{
+	struct mbuf *m = NULL;
+	
+        MGETHDR (m, M_DONTWAIT, MT_DATA);
+	
+        if (! m)
+		return 0;
+		
+		M_MOVE_PKTHDR(m, buf);
+
+		m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
+        m->m_pkthdr.len = buf->m_pkthdr.len;
+        m->m_len = buf->m_len;
+		m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
+
+		m->m_ext.ext_arg1 = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
+	
+       	return m;
+}
+
+/**
+ * Read the 'mac' node at the given device's node in the store, and parse that
+ * as colon-separated octets, placing result the given mac array.  mac must be
+ * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
+ * Return 0 on success, or errno on error.
+ */
+static int 
+xen_net_read_mac(struct xenbus_device *dev, uint8_t mac[])
+{
+	char *s;
+	int i;
+	char *e;
+	char *macstr = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+	if (IS_ERR(macstr)) {
+		return PTR_ERR(macstr);
+	}
+	s = macstr;
+	for (i = 0; i < ETHER_ADDR_LEN; i++) {
+		mac[i] = strtoul(s, &e, 16);
+		if (s == e || (e[0] != ':' && e[0] != 0)) {
+			free(macstr, M_DEVBUF);
+			return ENOENT;
+		}
+		s = &e[1];
+	}
+	free(macstr, M_DEVBUF);
+	return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Connected state.
+ */
+static int 
+netfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id)
+{
+	int err;
+	struct ifnet *ifp;
+	struct netfront_info *info;
+
+	printf("netfront_probe() \n");
+	
+	err = create_netdev(dev, &ifp);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "creating netdev");
+		return err;
+	}
+
+	info = ifp->if_softc;
+	dev->dev_driver_data = info;
+
+	
+	return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int 
+netfront_resume(struct xenbus_device *dev)
+{
+	struct netfront_info *info = dev->dev_driver_data;
+	
+	DPRINTK("%s\n", dev->nodename);
+	
+	netif_disconnect_backend(info);
+	return (0);
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int 
+talk_to_backend(struct xenbus_device *dev, struct netfront_info *info)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	int err;
+
+	err = xen_net_read_mac(dev, info->mac);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+		goto out;
+	}
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_device(dev, info);
+	if (err)
+		goto out;
+	
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_ring;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
+			    info->tx_ring_ref);
+	if (err) {
+		message = "writing tx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
+			    info->rx_ring_ref);
+	if (err) {
+		message = "writing rx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename,
+		"event-channel", "%u", irq_to_evtchn_port(info->irq));
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+			    info->copying_receiver);
+	if (err) {
+		message = "writing request-rx-copy";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+	if (err) {
+		message = "writing feature-rx-notify";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
+	if (err) {
+		message = "writing feature-no-csum-offload";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+	if (err) {
+		message = "writing feature-sg";
+		goto abort_transaction;
+	}
+#ifdef HAVE_TSO
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv4";
+		goto abort_transaction;
+	}
+#endif
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_ring;
+	}
+	
+	return 0;
+	
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+	netif_free(info);
+ out:
+	return err;
+}
+
+
+static int 
+setup_device(struct xenbus_device *dev, struct netfront_info *info)
+{
+	netif_tx_sring_t *txs;
+	netif_rx_sring_t *rxs;
+	int err;
+	struct ifnet *ifp;
+	
+	ifp = info->xn_ifp;
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->rx.sring = NULL;
+	info->tx.sring = NULL;
+	info->irq = 0;
+
+	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (!txs) {
+		err = ENOMEM;
+		xenbus_dev_fatal(dev, err, "allocating tx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(txs);
+	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+	if (err < 0)
+		goto fail;
+	info->tx_ring_ref = err;
+
+	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (!rxs) {
+		err = ENOMEM;
+		xenbus_dev_fatal(dev, err, "allocating rx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(rxs);
+	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+	if (err < 0)
+		goto fail;
+	info->rx_ring_ref = err;
+
+#if 0	
+	network_connect(ifp);
+#endif
+	err = bind_listening_port_to_irqhandler(dev->otherend_id,
+		"xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+
+	if (err <= 0) {
+		xenbus_dev_fatal(dev, err,
+				 "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+	info->irq = err;
+	
+	show_device(info);
+	
+	return 0;
+	
+ fail:
+	netif_free(info);
+	return err;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void
+backend_changed(struct xenbus_device *dev,
+			    XenbusState backend_state)
+{
+		struct netfront_info *sc = dev->dev_driver_data;
+		
+	DPRINTK("\n");
+	
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+			break;
+	case XenbusStateInitWait:
+		if (dev->state != XenbusStateInitialising)
+			break;
+		if (network_connect(sc->xn_ifp) != 0)
+			break;
+		xenbus_switch_state(dev, XenbusStateConnected);
+#ifdef notyet		
+		(void)send_fake_arp(netdev);
+#endif		
+		break;	break;
+	case XenbusStateClosing:
+			xenbus_frontend_closed(dev);
+		break;
+	}
+}
+
+static void
+xn_free_rx_ring(struct netfront_info *sc)
+{
+#if 0
+	int i;
+	
+	for (i = 0; i < NET_RX_RING_SIZE; i++) {
+		if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
+			m_freem(sc->xn_cdata.xn_rx_chain[i]);
+			sc->xn_cdata.xn_rx_chain[i] = NULL;
+		}
+	}
+	
+	sc->rx.rsp_cons = 0;
+	sc->xn_rx_if->req_prod = 0;
+	sc->xn_rx_if->event = sc->rx.rsp_cons ;
+#endif
+}
+
+static void
+xn_free_tx_ring(struct netfront_info *sc)
+{
+#if 0
+	int i;
+	
+	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+		if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
+			m_freem(sc->xn_cdata.xn_tx_chain[i]);
+			sc->xn_cdata.xn_tx_chain[i] = NULL;
+		}
+	}
+	
+	return;
+#endif
+}
+
+static inline int
+netfront_tx_slot_available(struct netfront_info *np)
+{
+	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+		(TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
+}
+static void
+netif_release_tx_bufs(struct netfront_info *np)
+{
+	struct mbuf *m;
+	int i;
+
+	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
+		m = np->xn_cdata.xn_tx_chain[i];
+
+		if (((u_long)m) < KERNBASE)
+			continue;
+		gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
+		    np->xbdev->otherend_id, virt_to_mfn(mtod(m, vm_offset_t)),
+		    GNTMAP_readonly);
+		gnttab_release_grant_reference(&np->gref_tx_head,
+		    np->grant_tx_ref[i]);
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;
+		add_id_to_freelist(np->tx_mbufs, i);
+		m_freem(m);
+	}
+}
+
+static void
+network_alloc_rx_buffers(struct netfront_info *sc)
+{
+	unsigned short id;
+	struct mbuf *m_new;
+	int i, batch_target, notify;
+	RING_IDX req_prod;
+	struct xen_memory_reservation reservation;
+	grant_ref_t ref;
+	int nr_flips;
+	netif_rx_request_t *req;
+	vm_offset_t vaddr;
+	u_long pfn;
+	
+	req_prod = sc->rx.req_prod_pvt;
+
+	if (unlikely(sc->carrier == 0))
+		return;
+	
+	/*
+	 * Allocate skbuffs greedily, even though we batch updates to the
+	 * receive ring. This creates a less bursty demand on the memory
+	 * allocator, so should reduce the chance of failed allocation
+	 * requests both for ourself and for other kernel subsystems.
+	 */
+	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
+	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
+		MGETHDR(m_new, M_DONTWAIT, MT_DATA);
+		if (m_new == NULL) 
+			goto no_mbuf;
+
+		m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
+		if ((m_new->m_flags & M_EXT) == 0) {
+			m_freem(m_new);
+
+no_mbuf:
+			if (i != 0)
+				goto refill;
+			/*
+			 * XXX set timer
+			 */
+			break;
+		}
+		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
+		
+		/* queue the mbufs allocated */
+		mbufq_tail(&sc->xn_rx_batch, m_new);
+	}
+	
+	/* Is the batch large enough to be worthwhile? */
+	if (i < (sc->rx_target/2)) {
+		if (req_prod >sc->rx.sring->req_prod)
+			goto push;
+		return;
+	}
+	/* Adjust floating fill target if we risked running out of buffers. */
+	if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
+	     ((sc->rx_target *= 2) > sc->rx_max_target) )
+		sc->rx_target = sc->rx_max_target;
+
+refill:
+	for (nr_flips = i = 0; ; i++) {
+		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
+			break;
+
+		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
+		    vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
+
+		id = xennet_rxidx(req_prod + i);
+
+		KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
+		    ("non-NULL xm_rx_chain"));
+		sc->xn_cdata.xn_rx_chain[id] = m_new;
+
+		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
+		KASSERT((short)ref >= 0, ("negative ref"));
+		sc->grant_rx_ref[id] = ref;
+
+		vaddr = mtod(m_new, vm_offset_t);
+		pfn = vtophys(vaddr) >> PAGE_SHIFT;
+		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
+
+		if (sc->copying_receiver == 0) {
+			gnttab_grant_foreign_transfer_ref(ref,
+			    sc->xbdev->otherend_id, pfn);
+			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+				/* Remove this page before passing
+				 * back to Xen.
+				 */
+				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+				MULTI_update_va_mapping(&sc->rx_mcl[i],
+				    vaddr, 0, 0);
+			}
+			nr_flips++;
+		} else {
+			gnttab_grant_foreign_access_ref(ref,
+			    sc->xbdev->otherend_id,
+			    PFNTOMFN(pfn), 0);
+		}
+		req->id = id;
+		req->gref = ref;
+		
+		sc->rx_pfn_array[i] =
+		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+	} 
+	
+	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
+	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
+	/*
+	 * We may have allocated buffers which have entries outstanding
+	 * in the page * update queue -- make sure we flush those first!
+	 */
+	PT_UPDATES_FLUSH();
+	if (nr_flips != 0) {
+#ifdef notyet
+		/* Tell the ballon driver what is going on. */
+		balloon_update_driver_allowance(i);
+#endif
+		set_xen_guest_handle(reservation.extent_start,sc->rx_pfn_array);
+		reservation.nr_extents   = i;
+		reservation.extent_order = 0;
+		reservation.address_bits = 0;
+		reservation.domid        = DOMID_SELF;
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+			/* After all PTEs have been zapped, flush the TLB. */
+			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+			    UVMF_TLB_FLUSH|UVMF_ALL;
+	
+			/* Give away a batch of pages. */
+			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
+			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
+			/* Zap PTEs and give away pages in one big multicall. */
+			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
+
+			/* Check return status of HYPERVISOR_dom_mem_op(). */
+			if (unlikely(sc->rx_mcl[i].result != i))
+				panic("Unable to reduce memory reservation\n");
+			} else {
+				if (HYPERVISOR_memory_op(
+				    XENMEM_decrease_reservation, &reservation)
+				    != i)
+					panic("Unable to reduce memory "
+					    "reservation\n");
+		}
+	} else {
+		wmb();
+	}
+			
+	/* Above is a suitable barrier to ensure backend will see requests. */
+	sc->rx.req_prod_pvt = req_prod + i;
+push:
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
+	if (notify)
+		notify_remote_via_irq(sc->irq);
+}
+
+static void
+xn_rxeof(struct netfront_info *np)
+{
+	struct ifnet *ifp;
+	struct netfront_rx_info rinfo;
+	struct netif_rx_response *rx = &rinfo.rx;
+	struct netif_extra_info *extras = rinfo.extras;
+	RING_IDX i, rp;
+	multicall_entry_t *mcl;
+	struct mbuf *m;
+	struct mbuf_head rxq, errq, tmpq;
+	int err, pages_flipped = 0;
+
+	XN_RX_LOCK_ASSERT(np);
+	if (!netfront_carrier_ok(np))
+		return;
+
+	mbufq_init(&tmpq);
+	mbufq_init(&errq);
+	mbufq_init(&rxq);
+
+	ifp = np->xn_ifp;
+	
+	rp = np->rx.sring->rsp_prod;
+	rmb();	/* Ensure we see queued responses up to 'rp'. */
+
+	i = np->rx.rsp_cons;
+	while ((i != rp)) {
+		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+		memset(extras, 0, sizeof(rinfo.extras));
+
+		err = xennet_get_responses(np, &rinfo, rp, &tmpq,
+		    &pages_flipped);
+
+		if (unlikely(err)) {
+			while ((m = mbufq_dequeue(&tmpq)))
+				mbufq_tail(&errq, m);
+			np->stats.rx_errors++;
+			i = np->rx.rsp_cons;
+			continue;
+		}
+
+		m = mbufq_dequeue(&tmpq);
+
+		m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */
+		m->m_pkthdr.len = m->m_len = rx->status;
+		m->m_pkthdr.rcvif = ifp;
+
+		if ( rx->flags & NETRXF_data_validated ) {
+			/* Tell the stack the checksums are okay */
+			/*
+			 * XXX this isn't necessarily the case - need to add
+			 * check
+			 */
+				
+			m->m_pkthdr.csum_flags |=
+			    (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
+			    | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+
+		np->stats.rx_packets++;
+		np->stats.rx_bytes += rx->status;
+
+		mbufq_tail(&rxq, m);
+		np->rx.rsp_cons = ++i;
+	}
+
+	if (pages_flipped) {
+		/* Some pages are no longer absent... */
+#ifdef notyet
+		balloon_update_driver_allowance(-pages_flipped);
+#endif
+		/* Do all the remapping work, and M->P updates, in one big
+		 * hypercall.
+		 */
+		if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
+			mcl = np->rx_mcl + pages_flipped;
+			mcl->op = __HYPERVISOR_mmu_update;
+			mcl->args[0] = (u_long)np->rx_mmu;
+			mcl->args[1] = pages_flipped;
+			mcl->args[2] = 0;
+			mcl->args[3] = DOMID_SELF;
+			(void)HYPERVISOR_multicall(np->rx_mcl,
+			    pages_flipped + 1);
+		}
+	}
+	
+	while ((m = mbufq_dequeue(&errq)))
+		m_freem(m);
+
+	/* 
+	 * Process all the mbufs after the remapping is complete.
+	 * Break the mbuf chain first though.
+	 */
+	while ((m = mbufq_dequeue(&rxq)) != NULL) {
+		ifp->if_ipackets++;
+			
+		/*
+		 * Do we really need to drop the rx lock?
+		 */
+		XN_RX_UNLOCK(np);
+		/* Pass it up. */
+		(*ifp->if_input)(ifp, m);
+		XN_RX_LOCK(np);
+	}
+	
+	np->rx.rsp_cons = i;
+
+#if 0
+	/* If we get a callback with very few responses, reduce fill target. */
+	/* NB. Note exponential increase, linear decrease. */
+	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
+	    ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
+		np->rx_target = np->rx_min_target;
+#endif
+	
+	network_alloc_rx_buffers(np);
+
+	np->rx.sring->rsp_event = i + 1;
+}
+
+static void 
+xn_txeof(struct netfront_info *np)
+{
+	RING_IDX i, prod;
+	unsigned short id;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	
+	XN_TX_LOCK_ASSERT(np);
+	
+	if (!netfront_carrier_ok(np))
+		return;
+	
+	ifp = np->xn_ifp;
+	ifp->if_timer = 0;
+	
+	do {
+		prod = np->tx.sring->rsp_prod;
+		rmb(); /* Ensure we see responses up to 'rp'. */
+		
+		for (i = np->tx.rsp_cons; i != prod; i++) {
+			id = RING_GET_RESPONSE(&np->tx, i)->id;
+			m = np->xn_cdata.xn_tx_chain[id]; 
+			
+			ifp->if_opackets++;
+			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
+			M_ASSERTVALID(m);
+			if (unlikely(gnttab_query_foreign_access(
+			    np->grant_tx_ref[id]) != 0)) {
+				printf("network_tx_buf_gc: warning "
+				    "-- grant still in use by backend "
+				    "domain.\n");
+				goto out; 
+			}
+			gnttab_end_foreign_access_ref(
+				np->grant_tx_ref[id], GNTMAP_readonly);
+			gnttab_release_grant_reference(
+				&np->gref_tx_head, np->grant_tx_ref[id]);
+			np->grant_tx_ref[id] = GRANT_INVALID_REF;
+			
+			np->xn_cdata.xn_tx_chain[id] = NULL;
+			add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
+			m_freem(m);
+		}
+		np->tx.rsp_cons = prod;
+		
+		/*
+		 * Set a new event, then check for race with update of
+		 * tx_cons. Note that it is essential to schedule a
+		 * callback, no matter how few buffers are pending. Even if
+		 * there is space in the transmit ring, higher layers may
+		 * be blocked because too much data is outstanding: in such
+		 * cases notification from Xen is likely to be the only kick
+		 * that we'll get.
+		 */
+		np->tx.sring->rsp_event =
+		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+
+		mb();
+		
+	} while (prod != np->tx.sring->rsp_prod);
+	
+ out: 
+	if (np->tx_full &&
+	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
+		np->tx_full = 0;
+#if 0
+		if (np->user_state == UST_OPEN)
+			netif_wake_queue(dev);
+#endif
+	}
+
+}
+
+static void
+xn_intr(void *xsc)
+{
+	struct netfront_info *np = xsc;
+	struct ifnet *ifp = np->xn_ifp;
+
+#if 0
+	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
+	    likely(netfront_carrier_ok(np)) &&
+	    ifp->if_drv_flags & IFF_DRV_RUNNING))
+		return;
+#endif
+	if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
+		XN_TX_LOCK(np);
+		xn_txeof(np);
+		XN_TX_UNLOCK(np);			
+	}	
+
+	XN_RX_LOCK(np);
+	xn_rxeof(np);
+	XN_RX_UNLOCK(np);
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		xn_start(ifp);
+}
+
+
+static void
+xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
+	grant_ref_t ref)
+{
+	int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
+	np->rx_mbufs[new] = m;
+	np->grant_rx_ref[new] = ref;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+	np->rx.req_prod_pvt++;
+}
+
+static int
+xennet_get_extras(struct netfront_info *np,
+    struct netif_extra_info *extras, RING_IDX rp)
+{
+	struct netif_extra_info *extra;
+	RING_IDX cons = np->rx.rsp_cons;
+
+	int err = 0;
+
+	do {
+		struct mbuf *m;
+		grant_ref_t ref;
+
+		if (unlikely(cons + 1 == rp)) {
+#if 0			
+			if (net_ratelimit())
+				WPRINTK("Missing extra info\n");
+#endif			
+			err = -EINVAL;
+			break;
+		}
+
+		extra = (struct netif_extra_info *)
+		RING_GET_RESPONSE(&np->rx, ++cons);
+
+		if (unlikely(!extra->type ||
+			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+#if 0				
+			if (net_ratelimit())
+				WPRINTK("Invalid extra type: %d\n",
+					extra->type);
+#endif			
+			err = -EINVAL;
+		} else {
+			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+		}
+
+		m = xennet_get_rx_mbuf(np, cons);
+		ref = xennet_get_rx_ref(np, cons);
+		xennet_move_rx_slot(np, m, ref);
+	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+	np->rx.rsp_cons = cons;
+	return err;
+}
+
+static int
+xennet_get_responses(struct netfront_info *np,
+	struct netfront_rx_info *rinfo, RING_IDX rp,
+	struct mbuf_head *list,
+	int *pages_flipped_p)
+{
+	int pages_flipped = *pages_flipped_p;
+	struct mmu_update *mmu;
+	struct multicall_entry *mcl;
+	struct netif_rx_response *rx = &rinfo->rx;
+	struct netif_extra_info *extras = rinfo->extras;
+	RING_IDX cons = np->rx.rsp_cons;
+	struct mbuf *m = xennet_get_rx_mbuf(np, cons);
+	grant_ref_t ref = xennet_get_rx_ref(np, cons);
+	int max = 24 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
+	int frags = 1;
+	int err = 0;
+	u_long ret;
+
+	if (rx->flags & NETRXF_extra_info) {
+		err = xennet_get_extras(np, extras, rp);
+		cons = np->rx.rsp_cons;
+	}
+
+	for (;;) {
+		u_long mfn;
+
+		if (unlikely(rx->status < 0 ||
+			rx->offset + rx->status > PAGE_SIZE)) {
+#if 0						
+			if (net_ratelimit())
+				WPRINTK("rx->offset: %x, size: %u\n",
+					rx->offset, rx->status);
+#endif						
+			xennet_move_rx_slot(np, m, ref);
+			err = -EINVAL;
+			goto next;
+		}
+
+		/*
+		 * This definitely indicates a bug, either in this driver or in
+		 * the backend driver. In future this should flag the bad
+		 * situation to the system controller to reboot the backed.
+		 */
+		if (ref == GRANT_INVALID_REF) {
+#if 0 				
+			if (net_ratelimit())
+				WPRINTK("Bad rx response id %d.\n", rx->id);
+#endif			
+			err = -EINVAL;
+			goto next;
+		}
+
+		if (!np->copying_receiver) {
+			/* Memory pressure, insufficient buffer
+			 * headroom, ...
+			 */
+			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+				if (net_ratelimit())
+					WPRINTK("Unfulfilled rx req "
+						"(id=%d, st=%d).\n",
+						rx->id, rx->status);
+				xennet_move_rx_slot(np, m, ref);
+				err = -ENOMEM;
+				goto next;
+			}
+
+			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
+				/* Remap the page. */
+				void *vaddr = mtod(m, void *);
+				uint32_t pfn;
+
+				mcl = np->rx_mcl + pages_flipped;
+				mmu = np->rx_mmu + pages_flipped;
+
+				MULTI_update_va_mapping(mcl, (u_long)vaddr,
+				    (mfn << PAGE_SHIFT) | PG_RW |
+				    PG_V | PG_M | PG_A, 0);
+				pfn = (uint32_t)m->m_ext.ext_arg1;
+				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
+				    MMU_MACHPHYS_UPDATE;
+				mmu->val = pfn;
+
+				set_phys_to_machine(pfn, mfn);
+			}
+			pages_flipped++;
+		} else {
+			ret = gnttab_end_foreign_access_ref(ref, 0);
+			KASSERT(ret, ("ret != 0"));
+		}
+
+		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+		mbufq_tail(list, m);
+
+next:
+		if (!(rx->flags & NETRXF_more_data))
+			break;
+
+		if (cons + frags == rp) {
+			if (net_ratelimit())
+				WPRINTK("Need more frags\n");
+			err = -ENOENT;
+				break;
+		}
+
+		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+		m = xennet_get_rx_mbuf(np, cons + frags);
+		ref = xennet_get_rx_ref(np, cons + frags);
+		frags++;
+	}
+
+	if (unlikely(frags > max)) {
+		if (net_ratelimit())
+			WPRINTK("Too many frags\n");
+		err = -E2BIG;
+	}
+
+	if (unlikely(err))
+		np->rx.rsp_cons = cons + frags;
+
+	*pages_flipped_p = pages_flipped;
+
+	return err;
+}
+
+static void
+xn_tick_locked(struct netfront_info *sc) 
+{
+	XN_RX_LOCK_ASSERT(sc);
+	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+	/* XXX placeholder for printing debug information */
+     
+}
+
+
+static void
+xn_tick(void *xsc) 
+{
+	struct netfront_info *sc;
+    
+	sc = xsc;
+	XN_RX_LOCK(sc);
+	xn_tick_locked(sc);
+	XN_RX_UNLOCK(sc);
+     
+}
+static void
+xn_start_locked(struct ifnet *ifp) 
+{
+	unsigned short id;
+	struct mbuf *m_head, *new_m;
+	struct netfront_info *sc;
+	netif_tx_request_t *tx;
+	RING_IDX i;
+	grant_ref_t ref;
+	u_long mfn, tx_bytes;
+	int notify;
+
+	sc = ifp->if_softc;
+	tx_bytes = 0;
+
+	if (!netfront_carrier_ok(sc))
+		return;
+	
+	for (i = sc->tx.req_prod_pvt; TRUE; i++) {
+		IF_DEQUEUE(&ifp->if_snd, m_head);
+		if (m_head == NULL) 
+			break;
+		
+		if (!netfront_tx_slot_available(sc)) {
+			IF_PREPEND(&ifp->if_snd, m_head);
+			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+			break;
+		}
+		
+		id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
+
+		/*
+		 * Start packing the mbufs in this chain into
+		 * the fragment pointers. Stop when we run out
+		 * of fragments or hit the end of the mbuf chain.
+		 */
+		new_m = makembuf(m_head);
+		tx = RING_GET_REQUEST(&sc->tx, i);
+		tx->id = id;
+		ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
+		KASSERT((short)ref >= 0, ("Negative ref"));
+		mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
+		gnttab_grant_foreign_access_ref(ref, sc->xbdev->otherend_id,
+		    mfn, GNTMAP_readonly);
+		tx->gref = sc->grant_tx_ref[id] = ref;
+		tx->size = new_m->m_pkthdr.len;
+#if 0
+		tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0;
+#endif
+		tx->flags = 0;
+		new_m->m_next = NULL;
+		new_m->m_nextpkt = NULL;
+
+		m_freem(m_head);
+
+		sc->xn_cdata.xn_tx_chain[id] = new_m;
+		BPF_MTAP(ifp, new_m);
+
+		sc->stats.tx_bytes += new_m->m_pkthdr.len;
+		sc->stats.tx_packets++;
+	}
+
+	sc->tx.req_prod_pvt = i;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
+	if (notify)
+		notify_remote_via_irq(sc->irq);
+
+	xn_txeof(sc);
+
+	if (RING_FULL(&sc->tx)) {
+		sc->tx_full = 1;
+#if 0
+		netif_stop_queue(dev);
+#endif
+	}
+
+	return;
+}    
+
+static void
+xn_start(struct ifnet *ifp)
+{
+	struct netfront_info *sc;
+	sc = ifp->if_softc;
+	XN_TX_LOCK(sc);
+	xn_start_locked(ifp);
+	XN_TX_UNLOCK(sc);
+}
+
+/* equivalent of network_open() in Linux */
+static void 
+xn_ifinit_locked(struct netfront_info *sc) 
+{
+	struct ifnet *ifp;
+	
+	XN_LOCK_ASSERT(sc);
+	
+	ifp = sc->xn_ifp;
+	
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
+		return;
+	
+	xn_stop(sc);
+	
+	network_alloc_rx_buffers(sc);
+	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
+	
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	
+	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+}
+
+
+static void 
+xn_ifinit(void *xsc)
+{
+	struct netfront_info *sc = xsc;
+    
+	XN_LOCK(sc);
+	xn_ifinit_locked(sc);
+	XN_UNLOCK(sc);
+
+}
+
+
+static int
+xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct netfront_info *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *) data;
+	struct ifaddr *ifa = (struct ifaddr *)data;
+
+	int mask, error = 0;
+	switch(cmd) {
+	case SIOCSIFADDR:
+	case SIOCGIFADDR:
+		XN_LOCK(sc);
+		if (ifa->ifa_addr->sa_family == AF_INET) {
+			ifp->if_flags |= IFF_UP;
+			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
+				xn_ifinit_locked(sc);
+			arp_ifinit(ifp, ifa);
+		} else
+			error = ether_ioctl(ifp, cmd, data);
+		XN_UNLOCK(sc);
+		break;
+	case SIOCSIFMTU:
+		/* XXX can we alter the MTU on a VN ?*/
+#ifdef notyet
+		if (ifr->ifr_mtu > XN_JUMBO_MTU)
+			error = EINVAL;
+		else 
+#endif
+		{
+			ifp->if_mtu = ifr->ifr_mtu;
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+			xn_ifinit(sc);
+		}
+		break;
+	case SIOCSIFFLAGS:
+		XN_LOCK(sc);
+		if (ifp->if_flags & IFF_UP) {
+			/*
+			 * If only the state of the PROMISC flag changed,
+			 * then just use the 'set promisc mode' command
+			 * instead of reinitializing the entire NIC. Doing
+			 * a full re-init means reloading the firmware and
+			 * waiting for it to start up, which may take a
+			 * second or two.
+			 */
+#ifdef notyet
+			/* No promiscuous mode with Xen */
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+			    ifp->if_flags & IFF_PROMISC &&
+			    !(sc->xn_if_flags & IFF_PROMISC)) {
+				XN_SETBIT(sc, XN_RX_MODE,
+					  XN_RXMODE_RX_PROMISC);
+			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+				   !(ifp->if_flags & IFF_PROMISC) &&
+				   sc->xn_if_flags & IFF_PROMISC) {
+				XN_CLRBIT(sc, XN_RX_MODE,
+					  XN_RXMODE_RX_PROMISC);
+			} else
+#endif
+				xn_ifinit_locked(sc);
+		} else {
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+				xn_stop(sc);
+			}
+		}
+		sc->xn_if_flags = ifp->if_flags;
+		XN_UNLOCK(sc);
+		error = 0;
+		break;
+	case SIOCSIFCAP:
+		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+		if (mask & IFCAP_HWCSUM) {
+			if (IFCAP_HWCSUM & ifp->if_capenable)
+				ifp->if_capenable &= ~IFCAP_HWCSUM;
+			else
+				ifp->if_capenable |= IFCAP_HWCSUM;
+		}
+		error = 0;
+		break;
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+#ifdef notyet
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+			XN_LOCK(sc);
+			xn_setmulti(sc);
+			XN_UNLOCK(sc);
+			error = 0;
+		}
+#endif
+		/* FALLTHROUGH */
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error = EINVAL;
+		break;
+	default:
+		error = ether_ioctl(ifp, cmd, data);
+	}
+    
+	return (error);
+}
+
+static void
+xn_stop(struct netfront_info *sc)
+{	
+	struct ifnet *ifp;
+
+	XN_LOCK_ASSERT(sc);
+    
+	ifp = sc->xn_ifp;
+
+	callout_stop(&sc->xn_stat_ch);
+
+	xn_free_rx_ring(sc);
+	xn_free_tx_ring(sc);
+    
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+}
+
+/* START of Xenolinux helper functions adapted to FreeBSD */
+static int
+network_connect(struct ifnet *ifp)
+{
+	struct netfront_info *np;
+	int i, requeue_idx, err;
+	grant_ref_t ref;
+	netif_rx_request_t *req;
+	u_int feature_rx_copy, feature_rx_flip;
+
+	printf("network_connect\n");
+	
+	np = ifp->if_softc;
+	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+			   "feature-rx-copy", "%u", &feature_rx_copy);
+	if (err != 1)
+		feature_rx_copy = 0;
+	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+			   "feature-rx-flip", "%u", &feature_rx_flip);
+	if (err != 1)
+		feature_rx_flip = 1;
+
+	/*
+	 * Copy packets on receive path if:
+	 *  (a) This was requested by user, and the backend supports it; or
+	 *  (b) Flipping was requested, but this is unsupported by the backend.
+	 */
+	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
+				(MODPARM_rx_flip && !feature_rx_flip));
+
+	XN_LOCK(np);
+	/* Recovery procedure: */
+	err = talk_to_backend(np->xbdev, np);
+	if (err) 
+			return (err);
+	
+	/* Step 1: Reinitialise variables. */
+	netif_release_tx_bufs(np);
+
+	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+		struct mbuf *m;
+
+		if (np->rx_mbufs[i] == NULL)
+			continue;
+
+		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
+		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+		req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+		if (!np->copying_receiver) {
+			gnttab_grant_foreign_transfer_ref(ref,
+			    np->xbdev->otherend_id,
+			    vtophys(mtod(m, vm_offset_t)));
+		} else {
+			gnttab_grant_foreign_access_ref(ref,
+			    np->xbdev->otherend_id,
+			    vtophys(mtod(m, vm_offset_t)), 0);
+		}
+		req->gref = ref;
+		req->id   = requeue_idx;
+
+		requeue_idx++;
+	}
+
+	np->rx.req_prod_pvt = requeue_idx;
+	
+	/* Step 3: All public and private state should now be sane.  Get
+	 * ready to start sending and receiving packets and give the driver
+	 * domain a kick because we've probably just requeued some
+	 * packets.
+	 */
+	netfront_carrier_on(np);
+	notify_remote_via_irq(np->irq);
+	XN_TX_LOCK(np);
+	xn_txeof(np);
+	XN_TX_UNLOCK(np);
+	network_alloc_rx_buffers(np);
+	XN_UNLOCK(np);
+
+	return (0);
+}
+
+
+static void 
+show_device(struct netfront_info *sc)
+{
+#ifdef DEBUG
+	if (sc) {
+		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+			sc->xn_ifno,
+			be_state_name[sc->xn_backend_state],
+			sc->xn_user_state ? "open" : "closed",
+			sc->xn_evtchn,
+			sc->xn_irq,
+			sc->xn_tx_if,
+			sc->xn_rx_if);
+	} else {
+		IPRINTK("<vif NULL>\n");
+	}
+#endif
+}
+
+static int ifno = 0;
+
+/** Create a network device.
+ * @param handle device handle
+ */
+static int 
+create_netdev(struct xenbus_device *dev, struct ifnet **ifpp)
+{
+	int i;
+	struct netfront_info *np;
+	int err;
+	struct ifnet *ifp;
+
+	np = (struct netfront_info *)malloc(sizeof(struct netfront_info),
+	    M_DEVBUF, M_NOWAIT);
+	if (np == NULL)
+			return (ENOMEM);
+	
+	memset(np, 0, sizeof(struct netfront_info));
+	
+	np->xbdev         = dev;
+    
+	XN_LOCK_INIT(np, xennetif);
+	np->rx_target     = RX_MIN_TARGET;
+	np->rx_min_target = RX_MIN_TARGET;
+	np->rx_max_target = RX_MAX_TARGET;
+	
+	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
+		np->tx_mbufs[i] = (void *) ((u_long) i+1);
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;	
+	}
+	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
+		np->rx_mbufs[i] = NULL;
+		np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	}
+	/* A grant for every tx ring slot */
+	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+					  &np->gref_tx_head) < 0) {
+		printf("#### netfront can't alloc tx grant refs\n");
+		err = ENOMEM;
+		goto exit;
+	}
+	/* A grant for every rx ring slot */
+	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+					  &np->gref_rx_head) < 0) {
+		printf("#### netfront can't alloc rx grant refs\n");
+		gnttab_free_grant_references(np->gref_tx_head);
+		err = ENOMEM;
+		goto exit;
+	}
+	
+	err = xen_net_read_mac(dev, np->mac);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+		goto out;
+	}
+	
+	/* Set up ifnet structure */
+	*ifpp = ifp = np->xn_ifp = if_alloc(IFT_ETHER);
+    	ifp->if_softc = np;
+    	if_initname(ifp, "xn",  ifno++/* ifno */);
+    	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+    	ifp->if_ioctl = xn_ioctl;
+    	ifp->if_output = ether_output;
+    	ifp->if_start = xn_start;
+#ifdef notyet
+    	ifp->if_watchdog = xn_watchdog;
+#endif
+    	ifp->if_init = xn_ifinit;
+    	ifp->if_mtu = ETHERMTU;
+    	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+	
+#ifdef notyet
+    	ifp->if_hwassist = XN_CSUM_FEATURES;
+    	ifp->if_capabilities = IFCAP_HWCSUM;
+    	ifp->if_capenable = ifp->if_capabilities;
+#endif    
+	
+    	ether_ifattach(ifp, np->mac);
+    	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
+	netfront_carrier_off(np);
+
+	return (0);
+
+exit:
+	gnttab_free_grant_references(np->gref_tx_head);
+out:
+	panic("do something smart");
+
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+#if 0
+static void netfront_closing(struct xenbus_device *dev)
+{
+#if 0
+	struct netfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+
+	close_netdev(info);
+#endif
+	xenbus_switch_state(dev, XenbusStateClosed);
+}
+#endif
+
+static int netfront_remove(struct xenbus_device *dev)
+{
+	struct netfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("%s\n", dev->nodename);
+
+	netif_free(info);
+	free(info, M_DEVBUF);
+
+	return 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+	netif_disconnect_backend(info);
+#if 0
+	close_netdev(info);
+#endif
+}
+
+
+
+static void netif_disconnect_backend(struct netfront_info *info)
+{
+	xn_stop(info);
+	end_access(info->tx_ring_ref, info->tx.sring);
+	end_access(info->rx_ring_ref, info->rx.sring);
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->tx.sring = NULL;
+	info->rx.sring = NULL;
+
+#if 0
+	if (info->irq)
+		unbind_from_irqhandler(info->irq, info->netdev);
+#else 
+	panic("FIX ME");
+#endif
+	info->irq = 0;
+}
+
+
+static void end_access(int ref, void *page)
+{
+	if (ref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(ref, 0, page);
+}
+
+
+/* ** Driver registration ** */
+
+
+static struct xenbus_device_id netfront_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+
+static struct xenbus_driver netfront = {
+	.name = "vif",
+	.ids = netfront_ids,
+	.probe = netfront_probe,
+	.remove = netfront_remove,
+	.resume = netfront_resume,
+	.otherend_changed = backend_changed,
+};
+
+static void
+netif_init(void *unused)
+{
+	if (!is_running_on_xen())
+		return;
+
+	if (is_initial_xendomain())
+		return;
+
+	IPRINTK("Initialising virtual ethernet driver.\n");
+
+	xenbus_register_frontend(&netfront);
+}
+
+SYSINIT(xennetif, SI_SUB_PSEUDO, SI_ORDER_ANY, netif_init, NULL)
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/pcifront/pcifront.c b/sys/dev/xen/pcifront/pcifront.c
new file mode 100644
index 0000000..e6c498b
--- /dev/null
+++ b/sys/dev/xen/pcifront/pcifront.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <machine/vmparam.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <machine/xen-public/io/pciif.h>
+
+#include <sys/pciio.h>
+#include <dev/pci/pcivar.h>
+#include "pcib_if.h"
+
+#ifdef XEN_PCIDEV_FE_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+#define WPRINTF(fmt, args...) \
+    printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define INVALID_GRANT_REF (0)
+#define INVALID_EVTCHN    (-1)
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+struct pcifront_device {
+	STAILQ_ENTRY(pcifront_device) next;
+
+	struct xenbus_device *xdev;
+
+	int unit;
+	int evtchn;
+	int gnt_ref;
+
+	/* Lock this when doing any operations in sh_info */
+	struct mtx sh_info_lock;
+	struct xen_pci_sharedinfo *sh_info;
+
+	device_t ndev;
+
+	int ref_cnt;
+};
+
+static STAILQ_HEAD(pcifront_dlist, pcifront_device) pdev_list = STAILQ_HEAD_INITIALIZER(pdev_list);
+
+struct xpcib_softc {
+	int domain;
+	int bus;
+	struct pcifront_device *pdev;
+};
+
+/* Allocate a PCI device structure */
+static struct pcifront_device *
+alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev = NULL;
+	int err, unit;
+
+	err = sscanf(xdev->nodename, "device/pci/%d", &unit);
+	if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err, "Error scanning pci device instance number");
+		goto out;
+	}
+
+	pdev = (struct pcifront_device *)malloc(sizeof(struct pcifront_device), M_DEVBUF, M_NOWAIT);
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err, "Error allocating pcifront_device struct");
+		goto out;
+	}
+	pdev->unit = unit;
+	pdev->xdev = xdev;
+	pdev->ref_cnt = 1;
+
+	pdev->sh_info = (struct xen_pci_sharedinfo *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pdev->sh_info == NULL) {
+		free(pdev, M_DEVBUF);
+		pdev = NULL;
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err, "Error allocating sh_info struct");
+		goto out;
+	}
+	pdev->sh_info->flags = 0;
+
+	xdev->data = pdev;
+
+	mtx_init(&pdev->sh_info_lock, "info_lock", "pci shared dev info lock", MTX_DEF);
+
+	pdev->evtchn = INVALID_EVTCHN;
+	pdev->gnt_ref = INVALID_GRANT_REF;
+
+	STAILQ_INSERT_TAIL(&pdev_list, pdev, next);
+
+	DPRINTF("Allocated pdev @ 0x%p (unit=%d)\n", pdev, unit);
+
+ out:
+	return pdev;
+}
+
+/* Hold a reference to a pcifront device */
+static void
+get_pdev(struct pcifront_device *pdev)
+{
+	pdev->ref_cnt++;
+}
+
+/* Release a reference to a pcifront device */
+static void
+put_pdev(struct pcifront_device *pdev)
+{
+	if (--pdev->ref_cnt > 0)
+		return;
+
+	DPRINTF("freeing pdev @ 0x%p (ref_cnt=%d)\n", pdev, pdev->ref_cnt);
+
+	if (pdev->evtchn != INVALID_EVTCHN)
+		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+	if (pdev->gnt_ref != INVALID_GRANT_REF)
+		gnttab_end_foreign_access(pdev->gnt_ref, 0, (void *)pdev->sh_info);
+
+	pdev->xdev->data = NULL;
+
+	free(pdev, M_DEVBUF);
+}
+
+
+/* Write to the xenbus info needed by backend */
+static int
+pcifront_publish_info(struct pcifront_device *pdev)
+{
+	int err = 0;
+	struct xenbus_transaction *trans;
+
+	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	if (err < 0) {
+		WPRINTF("error granting access to ring page\n");
+		goto out;
+	}
+
+	pdev->gnt_ref = err;
+
+	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+	if (err)
+		goto out;
+
+ do_publish:
+	trans = xenbus_transaction_start();
+	if (IS_ERR(trans)) {
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error writing configuration for backend "
+						 "(start transaction)");
+		goto out;
+	}
+
+	err = xenbus_printf(trans, pdev->xdev->nodename,
+						"pci-op-ref", "%u", pdev->gnt_ref);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+							"event-channel", "%u", pdev->evtchn);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+							"magic", XEN_PCI_MAGIC);
+	if (!err)
+		err = xenbus_switch_state(pdev->xdev, trans,
+								  XenbusStateInitialised);
+
+	if (err) {
+		xenbus_transaction_end(trans, 1);
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error writing configuration for backend");
+		goto out;
+	} else {
+		err = xenbus_transaction_end(trans, 0);
+		if (err == -EAGAIN)
+			goto do_publish;
+		else if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Error completing transaction for backend");
+			goto out;
+		}
+	}
+
+ out:
+	return err;
+}
+
+/* The backend is now connected so complete the connection process on our side */
+static int
+pcifront_connect(struct pcifront_device *pdev)
+{
+	device_t nexus;
+	devclass_t nexus_devclass;
+
+	/* We will add our device as a child of the nexus0 device */
+	if (!(nexus_devclass = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(nexus_devclass, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		return -1;
+	}
+
+	/* Create a newbus device representing this frontend instance */
+	pdev->ndev = BUS_ADD_CHILD(nexus, 0, "xpcife", pdev->unit);
+	if (!pdev->ndev) {
+		WPRINTF("could not create xpcife%d!\n", pdev->unit);
+		return -EFAULT;
+	}
+	get_pdev(pdev);
+	device_set_ivars(pdev->ndev, pdev);
+
+	/* Good to go connected now */
+	xenbus_switch_state(pdev->xdev, NULL, XenbusStateConnected);
+
+	printf("pcifront: connected to %s\n", pdev->xdev->nodename);
+
+	mtx_lock(&Giant);
+	device_probe_and_attach(pdev->ndev);
+	mtx_unlock(&Giant);
+
+	return 0;
+}
+
+/* The backend is closing so process a disconnect */
+static int
+pcifront_disconnect(struct pcifront_device *pdev)
+{
+	int err = 0;
+	XenbusState prev_state;
+
+	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+	if (prev_state < XenbusStateClosing) {
+		err = xenbus_switch_state(pdev->xdev, NULL, XenbusStateClosing);
+		if (!err && prev_state == XenbusStateConnected) {
+			/* TODO - need to detach the newbus devices */
+		}
+	}
+
+	return err;
+}
+
+/* Process a probe from the xenbus */
+static int
+pcifront_probe(struct xenbus_device *xdev,
+			   const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pcifront_device *pdev;
+
+	DPRINTF("xenbus probing\n");
+
+	if ((pdev = alloc_pdev(xdev)) == NULL)
+		goto out;
+
+	err = pcifront_publish_info(pdev);
+
+ out:
+	if (err)
+		put_pdev(pdev);
+	return err;
+}
+
+/* Remove the xenbus PCI device */
+static int
+pcifront_remove(struct xenbus_device *xdev)
+{
+	DPRINTF("removing xenbus device node (%s)\n", xdev->nodename);
+	if (xdev->data)
+		put_pdev(xdev->data);
+	return 0;
+}
+
+/* Called by xenbus when our backend node changes state */
+static void
+pcifront_backend_changed(struct xenbus_device *xdev,
+						 XenbusState be_state)
+{
+	struct pcifront_device *pdev = xdev->data;
+
+	switch (be_state) {
+	case XenbusStateClosing:
+		DPRINTF("backend closing (%s)\n", xdev->nodename);
+		pcifront_disconnect(pdev);
+		break;
+
+	case XenbusStateClosed:
+		DPRINTF("backend closed (%s)\n", xdev->nodename);
+		pcifront_disconnect(pdev);
+		break;
+
+	case XenbusStateConnected:
+		DPRINTF("backend connected (%s)\n", xdev->nodename);
+		pcifront_connect(pdev);
+		break;
+		
+	default:
+		break;
+	}
+}
+
+/* Process PCI operation */
+static int
+do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+	int err = 0;
+	struct xen_pci_op *active_op = &pdev->sh_info->op;
+	evtchn_port_t port = pdev->evtchn;
+	time_t timeout;
+
+	mtx_lock(&pdev->sh_info_lock);
+
+	memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+	/* Go */
+	wmb();
+	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(port);
+
+	timeout = time_uptime + 2;
+
+	clear_evtchn(port);
+
+	/* Spin while waiting for the answer */
+	while (test_bit
+	       (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) {
+		int err = HYPERVISOR_poll(&port, 1, 3 * hz);
+		if (err)
+			panic("Failed HYPERVISOR_poll: err=%d", err);
+		clear_evtchn(port);
+		if (time_uptime > timeout) {
+			WPRINTF("pciback not responding!!!\n");
+			clear_bit(_XEN_PCIF_active,
+				  (unsigned long *)&pdev->sh_info->flags);
+			err = XEN_PCI_ERR_dev_not_found;
+			goto out;
+		}
+	}
+
+	memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+	err = op->err;
+ out:
+	mtx_unlock(&pdev->sh_info_lock);
+	return err;
+}
+
+/* ** XenBus Driver registration ** */
+
+static struct xenbus_device_id pcifront_ids[] = {
+	{ "pci" },
+	{ "" }
+};
+
+static struct xenbus_driver pcifront = {
+	.name = "pcifront",
+	.ids = pcifront_ids,
+	.probe = pcifront_probe,
+	.remove = pcifront_remove,
+	.otherend_changed = pcifront_backend_changed,
+};
+
+/* Register the driver with xenbus during sys init */
+static void
+pcifront_init(void *unused)
+{
+	if ((xen_start_info->flags & SIF_INITDOMAIN))
+		return;
+
+	DPRINTF("xenbus registering\n");
+
+	xenbus_register_frontend(&pcifront);
+}
+
+SYSINIT(pciif, SI_SUB_PSEUDO, SI_ORDER_ANY, pcifront_init, NULL)
+
+
+/* Newbus xpcife device driver probe */
+static int
+xpcife_probe(device_t dev)
+{
+#ifdef XEN_PCIDEV_FE_DEBUG
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+	DPRINTF("xpcife probe (unit=%d)\n", pdev->unit);
+#endif
+	return 0;
+}
+
+/* Newbus xpcife device driver attach */
+static int
+xpcife_attach(device_t dev) 
+{
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+	int i, num_roots, len, err;
+	char str[64];
+	unsigned int domain, bus;
+
+	DPRINTF("xpcife attach (unit=%d)\n", pdev->unit);
+
+	err = xenbus_scanf(NULL, pdev->xdev->otherend,
+					   "root_num", "%d", &num_roots);
+	if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	/* Add a pcib device for each root */
+	for (i = 0; i < num_roots; i++) {
+		device_t child;
+
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(NULL, pdev->xdev->otherend, str,
+						   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Error reading PCI root %d", i);
+			goto out;
+		}
+		err = 0;
+		if (domain != pdev->xdev->otherend_id) {
+			err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Domain mismatch %d != %d", domain, pdev->xdev->otherend_id);
+			goto out;
+		}
+		
+		child = device_add_child(dev, "pcib", bus);
+		if (!child) {
+			err = -ENOMEM;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Unable to create pcib%d", bus);
+			goto out;
+		}
+	}
+
+ out:
+	return bus_generic_attach(dev);
+}
+
+static devclass_t xpcife_devclass;
+
+static device_method_t xpcife_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, xpcife_probe),
+	DEVMETHOD(device_attach, xpcife_attach),
+	DEVMETHOD(device_detach,	bus_generic_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+    /* Bus interface */
+    DEVMETHOD(bus_print_child,		bus_generic_print_child),
+    DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
+    DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
+    DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
+    DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
+    DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
+    DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
+	{0, 0}
+};
+
+static driver_t xpcife_driver = {
+	"xpcife",
+	xpcife_methods,
+	0,
+};
+
+DRIVER_MODULE(xpcife, nexus, xpcife_driver, xpcife_devclass, 0, 0);
+
+
+/* Newbus xen pcib device driver probe */
+static int
+xpcib_probe(device_t dev)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(device_get_parent(dev));
+
+	DPRINTF("xpcib probe (bus=%d)\n", device_get_unit(dev));
+
+	sc->domain = pdev->xdev->otherend_id;
+	sc->bus = device_get_unit(dev);
+	sc->pdev = pdev;
+	
+	return 0;
+}
+
+/* Newbus xen pcib device driver attach */
+static int
+xpcib_attach(device_t dev) 
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+
+	DPRINTF("xpcib attach (bus=%d)\n", sc->bus);
+
+	device_add_child(dev, "pci", sc->bus);
+	return bus_generic_attach(dev);
+}
+
+static int
+xpcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	switch (which) {
+	case  PCIB_IVAR_BUS:
+		*result = sc->bus;
+		return 0;
+	}
+	return ENOENT;
+}
+
+/* Return the number of slots supported */
+static int
+xpcib_maxslots(device_t dev)
+{
+	return 31;
+}
+
+#define PCI_DEVFN(slot,func)	((((slot) & 0x1f) << 3) | ((func) & 0x07))
+
+/* Read configuration space register */
+static u_int32_t
+xpcib_read_config(device_t dev, int bus, int slot, int func,
+				  int reg, int bytes)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_read,
+		.domain = sc->domain,
+		.bus    = sc->bus,
+		.devfn  = PCI_DEVFN(slot, func),
+		.offset = reg,
+		.size   = bytes,
+	};
+	int err;
+
+	err = do_pci_op(sc->pdev, &op);
+	
+	DPRINTF("read config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+			bus, slot, func, reg, bytes, op.value, err);
+
+	if (err)
+		op.value = ~0;
+
+	return op.value;
+}
+
+/* Write configuration space register */
+static void
+xpcib_write_config(device_t dev, int bus, int slot, int func,
+				   int reg, u_int32_t data, int bytes)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_write,
+		.domain = sc->domain,
+		.bus    = sc->bus,
+		.devfn  = PCI_DEVFN(slot, func),
+		.offset = reg,
+		.size   = bytes,
+		.value  = data,
+	};
+	int err;
+
+	err = do_pci_op(sc->pdev, &op);
+
+	DPRINTF("write config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+			bus, slot, func, reg, bytes, data, err);
+}
+
+static int
+xpcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+	struct pci_devinfo *dinfo = device_get_ivars(dev);
+	pcicfgregs *cfg = &dinfo->cfg;
+
+	DPRINTF("route intr (pin=%d, line=%d)\n", pin, cfg->intline);
+
+	return cfg->intline;
+}
+
+static device_method_t xpcib_methods[] = {
+    /* Device interface */
+    DEVMETHOD(device_probe,			xpcib_probe),
+    DEVMETHOD(device_attach,		xpcib_attach),
+    DEVMETHOD(device_detach,		bus_generic_detach),
+    DEVMETHOD(device_shutdown,		bus_generic_shutdown),
+    DEVMETHOD(device_suspend,		bus_generic_suspend),
+    DEVMETHOD(device_resume,		bus_generic_resume),
+
+    /* Bus interface */
+    DEVMETHOD(bus_print_child,		bus_generic_print_child),
+    DEVMETHOD(bus_read_ivar,		xpcib_read_ivar),
+    DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
+    DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
+    DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
+    DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
+    DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
+    DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
+
+    /* pcib interface */
+    DEVMETHOD(pcib_maxslots,		xpcib_maxslots),
+    DEVMETHOD(pcib_read_config,		xpcib_read_config),
+    DEVMETHOD(pcib_write_config,	xpcib_write_config),
+    DEVMETHOD(pcib_route_interrupt,	xpcib_route_interrupt),
+    { 0, 0 }
+};
+
+static devclass_t xpcib_devclass;
+
+DEFINE_CLASS_0(pcib, xpcib_driver, xpcib_methods, sizeof(struct xpcib_softc));
+DRIVER_MODULE(pcib, xpcife, xpcib_driver, xpcib_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */