summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/dev/xen/balloon/balloon.c446
-rw-r--r--sys/dev/xen/blkback/blkback.c1349
-rw-r--r--sys/dev/xen/blkfront/blkfront.c1021
-rw-r--r--sys/dev/xen/blkfront/block.h97
-rw-r--r--sys/dev/xen/console/console.c564
-rw-r--r--sys/dev/xen/console/xencons_ring.c154
-rw-r--r--sys/dev/xen/console/xencons_ring.h20
-rw-r--r--sys/dev/xen/evtchn/evtchn_dev.c394
-rw-r--r--sys/dev/xen/netback/netback.c1585
-rw-r--r--sys/dev/xen/netfront/mbufq.h123
-rw-r--r--sys/dev/xen/netfront/netfront.c1829
-rw-r--r--sys/dev/xen/pcifront/pcifront.c688
12 files changed, 8270 insertions, 0 deletions
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
new file mode 100644
index 0000000..fa49196
--- /dev/null
+++ b/sys/dev/xen/balloon/balloon.c
@@ -0,0 +1,446 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h>
+#include <machine/xenbus.h>
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+struct mtx balloon_lock;
+#ifdef notyet
+
+/* We aim for 'current allocation' == 'target allocation'. */
+static unsigned long current_pages;
+static unsigned long target_pages;
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+/* We may hit the hard limit in Xen. If we do then we remember it. */
+static unsigned long hard_limit;
+
+/*
+ * Drivers may alter the memory reservation independently, but they must
+ * inform the balloon driver so that we can avoid hitting the hard limit.
+ */
+static unsigned long driver_pages;
+
+struct balloon_entry {
+ vm_page_t page;
+ STAILQ_ENTRY(balloon_entry) list;
+};
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+
+static unsigned long balloon_low, balloon_high;
+
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+
+#define IPRINTK(fmt, args...) \
+ printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+ printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
+/* balloon_append: add the given page to the balloon. */
+static void
+balloon_append(vm_page_t page)
+{
+ struct balloon_entry *entry;
+
+ entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
+
+ STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
+ balloon_low++;
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static vm_page_t
+balloon_retrieve(void)
+{
+ vm_page_t page;
+ struct balloon_entry *entry;
+
+ if (STAILQ_EMPTY(&ballooned_pages))
+ return NULL;
+
+ entry = STAILQ_FIRST(&ballooned_pages);
+ STAILQ_REMOVE_HEAD(&ballooned_pages, list);
+
+ page = entry->page;
+ free(entry, M_DEVBUF);
+
+ balloon_low--;
+
+ return page;
+}
+
+static void
+balloon_alarm(unsigned long unused)
+{
+ wakeup(balloon_process);
+}
+
+static unsigned long
+current_target(void)
+{
+ unsigned long target = min(target_pages, hard_limit);
+ if (target > (current_pages + balloon_low + balloon_high))
+ target = current_pages + balloon_low + balloon_high;
+ return target;
+}
+
+static int
+increase_reservation(unsigned long nr_pages)
+{
+ unsigned long *mfn_list, pfn, i, flags;
+ struct page *page;
+ long rc;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+ nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+ mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (mfn_list == NULL)
+ return ENOMEM;
+
+
+ reservation.extent_start = mfn_list;
+ reservation.nr_extents = nr_pages;
+ rc = HYPERVISOR_memory_op(
+ XENMEM_increase_reservation, &reservation);
+ if (rc < nr_pages) {
+ int ret;
+ /* We hit the Xen hard limit: reprobe. */
+ reservation.extent_start = mfn_list;
+ reservation.nr_extents = rc;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ PANIC_IF(ret != rc);
+ hard_limit = current_pages + rc - driver_pages;
+ goto out;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ page = balloon_retrieve();
+ PANIC_IF(page == NULL);
+
+ pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+ PANIC_IF(phys_to_machine_mapping_valid(pfn));
+
+ /* Update P->M and M->P tables. */
+ PFNTOMFN(pfn) = mfn_list[i];
+ xen_machphys_update(mfn_list[i], pfn);
+
+ /* Relinquish the page back to the allocator. */
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ vm_page_free(page);
+ }
+
+ current_pages += nr_pages;
+ totalram_pages = current_pages;
+
+ out:
+ balloon_unlock(flags);
+
+ free((mfn_list);
+
+ return 0;
+}
+
+static int
+decrease_reservation(unsigned long nr_pages)
+{
+ unsigned long *mfn_list, pfn, i, flags;
+ struct page *page;
+ void *v;
+ int need_sleep = 0;
+ int ret;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+ nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+ mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (mfn_list == NULL)
+ return ENOMEM;
+
+ for (i = 0; i < nr_pages; i++) {
+ int color = 0;
+ if ((page = vm_page_alloc(NULL, color++,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+ nr_pages = i;
+ need_sleep = 1;
+ break;
+ }
+ pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+ mfn_list[i] = PFNTOMFN(pfn);
+ }
+
+ balloon_lock(flags);
+
+ /* No more mappings: invalidate P2M and add to balloon. */
+ for (i = 0; i < nr_pages; i++) {
+ pfn = MFNTOPFN(mfn_list[i]);
+ PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+ balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+ }
+
+ reservation.extent_start = mfn_list;
+ reservation.nr_extents = nr_pages;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ PANIC_IF(ret != nr_pages);
+
+ current_pages -= nr_pages;
+ totalram_pages = current_pages;
+
+ balloon_unlock(flags);
+
+ free(mfn_list, M_DEVBUF);
+
+ return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void
+balloon_process(void *unused)
+{
+ int need_sleep = 0;
+ long credit;
+
+ for (;;) {
+ do {
+ credit = current_target() - current_pages;
+ if (credit > 0)
+ need_sleep = (increase_reservation(credit) != 0);
+ if (credit < 0)
+ need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+ if (need_resched())
+ schedule();
+#endif
+ } while ((credit != 0) && !need_sleep);
+
+ /* Schedule more work if there is some still to be done. */
+ if (current_target() != current_pages)
+ timeout(balloon_alarm, NULL, ticks + HZ);
+
+ msleep(balloon_process, balloon_lock, 0, "balloon", -1);
+ }
+
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void
+set_new_target(unsigned long target)
+{
+ /* No need for lock. Not read-modify-write updates. */
+ hard_limit = ~0UL;
+ target_pages = target;
+ wakeup(balloon_process);
+}
+
+static struct xenbus_watch target_watch =
+{
+ .node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void
+watch_target(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ unsigned long long new_target;
+ int err;
+
+ err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
+ if (err != 1) {
+ /* This is ok (for domain0 at least) - so just return */
+ return;
+ }
+
+ /* The given memory/target value is in KiB, so it needs converting to
+ pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+ */
+ set_new_target(new_target >> (PAGE_SHIFT - 10));
+
+}
+
+static void
+balloon_init_watcher(void *)
+{
+ int err;
+
+ err = register_xenbus_watch(&target_watch);
+ if (err)
+ printf("Failed to set balloon watcher\n");
+
+}
+
+static void
+balloon_init(void *)
+{
+ unsigned long pfn;
+ struct page *page;
+
+ IPRINTK("Initialising balloon driver.\n");
+
+ if (xen_init() < 0)
+ return -1;
+
+ current_pages = min(xen_start_info->nr_pages, max_pfn);
+ target_pages = current_pages;
+ balloon_low = 0;
+ balloon_high = 0;
+ driver_pages = 0UL;
+ hard_limit = ~0UL;
+
+ init_timer(&balloon_timer);
+ balloon_timer.data = 0;
+ balloon_timer.function = balloon_alarm;
+
+ /* Initialise the balloon with excess memory space. */
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
+ balloon_append(page);
+ }
+
+ target_watch.callback = watch_target;
+
+ return 0;
+}
+
+void
+balloon_update_driver_allowance(long delta)
+{
+ unsigned long flags;
+
+ balloon_lock(flags);
+ driver_pages += delta;
+ balloon_unlock(flags);
+}
+
+#if 0
+static int dealloc_pte_fn(
+ pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+ unsigned long mfn = pte_mfn(*pte);
+ int ret;
+ struct xen_memory_reservation reservation = {
+ .extent_start = &mfn,
+ .nr_extents = 1,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ set_pte_at(&init_mm, addr, pte, __pte_ma(0));
+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ PANIC_IF(ret != 1);
+ return 0;
+}
+
+#endif
+vm_page_t
+balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+ unsigned long flags;
+ vm_page_t pages;
+ int i;
+ unsigned long *mfn_list;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
+ if (pages == NULL)
+ return NULL;
+
+ mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
+
+ for (i = 0; i < nr_pages; i++) {
+ mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
+ PFNTOMFN(i) = INVALID_P2M_ENTRY;
+ reservation.extent_start = mfn_list;
+ reservation.nr_extents = nr_pages;
+ PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages);
+ }
+
+ current_pages -= nr_pages;
+
+ wakeup(balloon_process);
+
+ return pages;
+}
+
+void
+balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
+{
+ unsigned long i, flags;
+
+ for (i = 0; i < nr_pages; i++)
+ balloon_append(page + i);
+
+ wakeup(balloon_process);
+}
+
+#endif
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
new file mode 100644
index 0000000..630a0bd
--- /dev/null
+++ b/sys/dev/xen/blkback/blkback.c
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/disk.h>
+#include <sys/bio.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <geom/geom.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#if XEN_BLKBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+ printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+struct ring_ref {
+ vm_offset_t va;
+ grant_handle_t handle;
+ uint64_t bus_addr;
+};
+
+typedef struct blkback_info {
+
+ /* Schedule lists */
+ STAILQ_ENTRY(blkback_info) next_req;
+ int on_req_sched_list;
+
+ struct xenbus_device *xdev;
+ XenbusState frontend_state;
+
+ domid_t domid;
+
+ int state;
+ int ring_connected;
+ struct ring_ref rr;
+ blkif_back_ring_t ring;
+ evtchn_port_t evtchn;
+ int irq;
+ void *irq_cookie;
+
+ int ref_cnt;
+
+ int handle;
+ char *mode;
+ char *type;
+ char *dev_name;
+
+ struct vnode *vn;
+ struct cdev *cdev;
+ struct cdevsw *csw;
+ u_int sector_size;
+ int sector_size_shift;
+ off_t media_size;
+ u_int media_num_sectors;
+ int major;
+ int minor;
+ int read_only;
+
+ struct mtx blk_ring_lock;
+
+ device_t ndev;
+
+ /* Stats */
+ int st_rd_req;
+ int st_wr_req;
+ int st_oo_req;
+ int st_err_req;
+} blkif_t;
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ *
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ *
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int blkif_reqs = 64;
+TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
+
+static int mmap_pages;
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct pending_req {
+ blkif_t *blkif;
+ uint64_t id;
+ int nr_pages;
+ int pendcnt;
+ unsigned short operation;
+ int status;
+ STAILQ_ENTRY(pending_req) free_list;
+} pending_req_t;
+
+static pending_req_t *pending_reqs;
+static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
+ STAILQ_HEAD_INITIALIZER(pending_free);
+static struct mtx pending_free_lock;
+
+static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
+ STAILQ_HEAD_INITIALIZER(req_sched_list);
+static struct mtx req_sched_list_lock;
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static grant_handle_t *pending_grant_handles;
+
+static struct task blk_req_task;
+
+/* Protos */
+static void disconnect_ring(blkif_t *blkif);
+static int vbd_add_dev(struct xenbus_device *xdev);
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+ return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+ void *pages;
+ int i = 0, j = 0;
+ multicall_entry_t mcl[17];
+ unsigned long mfn_list[16];
+ struct xen_memory_reservation reservation = {
+ .extent_start = mfn_list,
+ .nr_extents = 0,
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (pages == NULL)
+ return 0;
+
+ memset(mcl, 0, sizeof(mcl));
+
+ while (i < nr_pages) {
+ unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+ mcl[j].op = __HYPERVISOR_update_va_mapping;
+ mcl[j].args[0] = va;
+
+ mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+ xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+ if (j == 16 || i == nr_pages) {
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+ reservation.nr_extents = j;
+
+ mcl[j].op = __HYPERVISOR_memory_op;
+ mcl[j].args[0] = XENMEM_decrease_reservation;
+ mcl[j].args[1] = (unsigned long)&reservation;
+
+ (void)HYPERVISOR_multicall(mcl, j+1);
+
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+ j = 0;
+ }
+ }
+
+ return (unsigned long)pages;
+}
+
+static pending_req_t *
+alloc_req(void)
+{
+ pending_req_t *req;
+ mtx_lock(&pending_free_lock);
+ if ((req = STAILQ_FIRST(&pending_free))) {
+ STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
+ STAILQ_NEXT(req, free_list) = NULL;
+ }
+ mtx_unlock(&pending_free_lock);
+ return req;
+}
+
+static void
+free_req(pending_req_t *req)
+{
+ int was_empty;
+
+ mtx_lock(&pending_free_lock);
+ was_empty = STAILQ_EMPTY(&pending_free);
+ STAILQ_INSERT_TAIL(&pending_free, req, free_list);
+ mtx_unlock(&pending_free_lock);
+ if (was_empty)
+ taskqueue_enqueue(taskqueue_swi, &blk_req_task);
+}
+
+static void
+fast_flush_area(pending_req_t *req)
+{
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int i, invcount = 0;
+ grant_handle_t handle;
+ int ret;
+
+ for (i = 0; i < req->nr_pages; i++) {
+ handle = pending_handle(req, i);
+ if (handle == BLKBACK_INVALID_HANDLE)
+ continue;
+ unmap[invcount].host_addr = vaddr(req, i);
+ unmap[invcount].dev_bus_addr = 0;
+ unmap[invcount].handle = handle;
+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+ invcount++;
+ }
+
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
+ PANIC_IF(ret);
+}
+
+static void
+blkif_get(blkif_t *blkif)
+{
+ atomic_add_int(&blkif->ref_cnt, 1);
+}
+
+static void
+blkif_put(blkif_t *blkif)
+{
+ if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
+ DPRINTF("Removing %x\n", (unsigned int)blkif);
+ disconnect_ring(blkif);
+ if (blkif->mode)
+ free(blkif->mode, M_DEVBUF);
+ if (blkif->type)
+ free(blkif->type, M_DEVBUF);
+ if (blkif->dev_name)
+ free(blkif->dev_name, M_DEVBUF);
+ free(blkif, M_DEVBUF);
+ }
+}
+
+static int
+blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
+{
+ blkif_t *blkif;
+
+ blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!blkif)
+ return ENOMEM;
+
+ DPRINTF("Created %x\n", (unsigned int)blkif);
+
+ blkif->ref_cnt = 1;
+ blkif->domid = xdev->otherend_id;
+ blkif->handle = handle;
+ blkif->mode = mode;
+ blkif->type = type;
+ blkif->dev_name = params;
+ blkif->xdev = xdev;
+ xdev->data = blkif;
+
+ mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
+
+ if (strcmp(mode, "w"))
+ blkif->read_only = 1;
+
+ return 0;
+}
+
+static void
+add_to_req_schedule_list_tail(blkif_t *blkif)
+{
+ if (!blkif->on_req_sched_list) {
+ mtx_lock(&req_sched_list_lock);
+ if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
+ blkif_get(blkif);
+ STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+ blkif->on_req_sched_list = 1;
+ taskqueue_enqueue(taskqueue_swi, &blk_req_task);
+ }
+ mtx_unlock(&req_sched_list_lock);
+ }
+}
+
+/* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
+ and assumes that the state is connected */
+static void
+add_to_req_schedule_list_tail2(blkif_t *blkif)
+{
+ mtx_lock(&req_sched_list_lock);
+ if (!blkif->on_req_sched_list) {
+ STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+ blkif->on_req_sched_list = 1;
+ }
+ mtx_unlock(&req_sched_list_lock);
+}
+
+/* Removes blkif from front of list and does not call blkif_put() (caller must) */
+static blkif_t *
+remove_from_req_schedule_list(void)
+{
+ blkif_t *blkif;
+
+ mtx_lock(&req_sched_list_lock);
+
+ if ((blkif = STAILQ_FIRST(&req_sched_list))) {
+ STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
+ STAILQ_NEXT(blkif, next_req) = NULL;
+ blkif->on_req_sched_list = 0;
+ }
+
+ mtx_unlock(&req_sched_list_lock);
+
+ return blkif;
+}
+
+static void
+make_response(blkif_t *blkif, uint64_t id,
+ unsigned short op, int st)
+{
+ blkif_response_t *resp;
+ blkif_back_ring_t *blk_ring = &blkif->ring;
+ int more_to_do = 0;
+ int notify;
+
+ mtx_lock(&blkif->blk_ring_lock);
+
+
+ /* Place on the response ring for the relevant domain. */
+ resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+ blk_ring->rsp_prod_pvt++;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
+
+ if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
+ /*
+ * Tail check for pending requests. Allows frontend to avoid
+ * notifications if requests are already in flight (lower
+ * overheads and promotes batching).
+ */
+ RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
+
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
+ more_to_do = 1;
+
+ mtx_unlock(&blkif->blk_ring_lock);
+
+ if (more_to_do)
+ add_to_req_schedule_list_tail(blkif);
+
+ if (notify)
+ notify_remote_via_irq(blkif->irq);
+}
+
+static void
+end_block_io_op(struct bio *bio)
+{
+ pending_req_t *pending_req = bio->bio_caller2;
+
+ if (bio->bio_error) {
+ DPRINTF("BIO returned error %d for operation on device %s\n",
+ bio->bio_error, pending_req->blkif->dev_name);
+ pending_req->status = BLKIF_RSP_ERROR;
+ pending_req->blkif->st_err_req++;
+ }
+
+#if 0
+ printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
+ (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
+#endif
+
+ if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
+ fast_flush_area(pending_req);
+ make_response(pending_req->blkif, pending_req->id,
+ pending_req->operation, pending_req->status);
+ blkif_put(pending_req->blkif);
+ free_req(pending_req);
+ }
+
+ g_destroy_bio(bio);
+}
+
+static void
+dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
+{
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct {
+ unsigned long buf; unsigned int nsec;
+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int nseg = req->nr_segments, nr_sects = 0;
+ struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int operation, ret, i, nbio = 0;
+
+ /* Check that number of segments is sane. */
+ if (unlikely(nseg == 0) ||
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+ DPRINTF("Bad number of segments in request (%d)\n", nseg);
+ goto fail_response;
+ }
+
+ if (req->operation == BLKIF_OP_WRITE) {
+ if (blkif->read_only) {
+ DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
+ goto fail_response;
+ }
+ operation = BIO_WRITE;
+ } else
+ operation = BIO_READ;
+
+ pending_req->blkif = blkif;
+ pending_req->id = req->id;
+ pending_req->operation = req->operation;
+ pending_req->status = BLKIF_RSP_OKAY;
+ pending_req->nr_pages = nseg;
+
+ for (i = 0; i < nseg; i++) {
+ seg[i].nsec = req->seg[i].last_sect -
+ req->seg[i].first_sect + 1;
+
+ if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+ (seg[i].nsec <= 0))
+ goto fail_response;
+ nr_sects += seg[i].nsec;
+
+ map[i].host_addr = vaddr(pending_req, i);
+ map[i].dom = blkif->domid;
+ map[i].ref = req->seg[i].gref;
+ map[i].flags = GNTMAP_host_map;
+ if (operation == BIO_WRITE)
+ map[i].flags |= GNTMAP_readonly;
+ }
+
+ /* Convert to the disk's sector size */
+ nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+ PANIC_IF(ret);
+
+ for (i = 0; i < nseg; i++) {
+ if (unlikely(map[i].status != 0)) {
+ DPRINTF("invalid buffer -- could not remap it\n");
+ goto fail_flush;
+ }
+
+ pending_handle(pending_req, i) = map[i].handle;
+#if 0
+ /* Can't do this in FreeBSD since vtophys() returns the pfn */
+ /* of the remote domain who loaned us the machine page - DPT */
+ xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
+ map[i]dev_bus_addr >> PAGE_SHIFT;
+#endif
+ seg[i].buf = map[i].dev_bus_addr |
+ (req->seg[i].first_sect << 9);
+ }
+
+ if (req->sector_number + nr_sects > blkif->media_num_sectors) {
+ DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
+ operation == BIO_READ ? "read" : "write",
+ req->sector_number,
+ req->sector_number + nr_sects, blkif->dev_name);
+ goto fail_flush;
+ }
+
+ for (i = 0; i < nseg; i++) {
+ struct bio *bio;
+
+ if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
+ DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
+ goto fail_put_bio;
+ }
+
+ bio = biolist[nbio++] = g_new_bio();
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
+
+ bio->bio_cmd = operation;
+ bio->bio_offset = req->sector_number << blkif->sector_size_shift;
+ bio->bio_length = seg[i].nsec << 9;
+ bio->bio_bcount = bio->bio_length;
+ bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
+ bio->bio_done = end_block_io_op;
+ bio->bio_caller2 = pending_req;
+ bio->bio_dev = blkif->cdev;
+
+ req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
+#if 0
+ printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
+ (unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
+ blkif->cdev->si_iosize_max, seg[i].buf);
+#endif
+ }
+
+ pending_req->pendcnt = nbio;
+ blkif_get(blkif);
+
+ for (i = 0; i < nbio; i++)
+ (*blkif->csw->d_strategy)(biolist[i]);
+
+ return;
+
+ fail_put_bio:
+ for (i = 0; i < (nbio-1); i++)
+ g_destroy_bio(biolist[i]);
+ fail_flush:
+ fast_flush_area(pending_req);
+ fail_response:
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+ free_req(pending_req);
+}
+
+static void
+blk_req_action(void *context, int pending)
+{
+ blkif_t *blkif;
+
+ DPRINTF("\n");
+
+ while (!STAILQ_EMPTY(&req_sched_list)) {
+ blkif_back_ring_t *blk_ring;
+ RING_IDX rc, rp;
+
+ blkif = remove_from_req_schedule_list();
+
+ blk_ring = &blkif->ring;
+ rc = blk_ring->req_cons;
+ rp = blk_ring->sring->req_prod;
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
+ blkif_request_t *req;
+ pending_req_t *pending_req;
+
+ pending_req = alloc_req();
+ if (pending_req == NULL)
+ goto out_of_preqs;
+
+ req = RING_GET_REQUEST(blk_ring, rc);
+ blk_ring->req_cons = ++rc; /* before make_response() */
+
+ switch (req->operation) {
+ case BLKIF_OP_READ:
+ blkif->st_rd_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
+ break;
+ case BLKIF_OP_WRITE:
+ blkif->st_wr_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
+ break;
+ default:
+ blkif->st_err_req++;
+ DPRINTF("error: unknown block io operation [%d]\n",
+ req->operation);
+ make_response(blkif, req->id, req->operation,
+ BLKIF_RSP_ERROR);
+ free_req(pending_req);
+ break;
+ }
+ }
+
+ blkif_put(blkif);
+ }
+
+ return;
+
+ out_of_preqs:
+ /* We ran out of pending req structs */
+ /* Just requeue interface and wait to be rescheduled to run when one is freed */
+ add_to_req_schedule_list_tail2(blkif);
+ blkif->st_oo_req++;
+}
+
+/* Handle interrupt from a frontend */
+static void
+blkback_intr(void *arg)
+{
+ blkif_t *blkif = arg;
+ DPRINTF("%x\n", (unsigned int)blkif);
+ add_to_req_schedule_list_tail(blkif);
+}
+
+/* Map grant ref for ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+ struct gnttab_map_grant_ref op;
+
+ ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ if (ring->va == 0)
+ return ENOMEM;
+
+ op.host_addr = ring->va;
+ op.flags = GNTMAP_host_map;
+ op.ref = ref;
+ op.dom = dom;
+ HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+ if (op.status) {
+ WPRINTF("grant table op err=%d\n", op.status);
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+ return EACCES;
+ }
+
+ ring->handle = op.handle;
+ ring->bus_addr = op.dev_bus_addr;
+
+ return 0;
+}
+
+/* Unmap grant ref for ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+ struct gnttab_unmap_grant_ref op;
+
+ op.host_addr = ring->va;
+ op.dev_bus_addr = ring->bus_addr;
+ op.handle = ring->handle;
+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+ if (op.status)
+ WPRINTF("grant table op err=%d\n", op.status);
+
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+}
+
+static int
+connect_ring(blkif_t *blkif)
+{
+ struct xenbus_device *xdev = blkif->xdev;
+ blkif_sring_t *ring;
+ unsigned long ring_ref;
+ evtchn_port_t evtchn;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
+
+ if (blkif->ring_connected)
+ return 0;
+
+ // Grab FE data and map his memory
+ err = xenbus_gather(NULL, xdev->otherend,
+ "ring-ref", "%lu", &ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_fatal(xdev, err,
+ "reading %s/ring-ref and event-channel",
+ xdev->otherend);
+ return err;
+ }
+
+ err = map_ring(ring_ref, blkif->domid, &blkif->rr);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "mapping ring");
+ return err;
+ }
+ ring = (blkif_sring_t *)blkif->rr.va;
+ BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
+
+ op.u.bind_interdomain.remote_dom = blkif->domid;
+ op.u.bind_interdomain.remote_port = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_ring(&blkif->rr);
+ xenbus_dev_fatal(xdev, err, "binding event channel");
+ return err;
+ }
+ blkif->evtchn = op.u.bind_interdomain.local_port;
+
+ /* bind evtchn to irq handler */
+ blkif->irq =
+ bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
+ blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
+
+ blkif->ring_connected = 1;
+
+ DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
+ (unsigned int)blkif, blkif->evtchn, blkif->irq);
+
+ return 0;
+}
+
+static void
+disconnect_ring(blkif_t *blkif)
+{
+ DPRINTF("\n");
+
+ if (blkif->ring_connected) {
+ unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
+ blkif->irq = 0;
+ unmap_ring(&blkif->rr);
+ blkif->ring_connected = 0;
+ }
+}
+
+static void
+connect(blkif_t *blkif)
+{
+ struct xenbus_transaction *xbt;
+ struct xenbus_device *xdev = blkif->xdev;
+ int err;
+
+ if (!blkif->ring_connected ||
+ blkif->vn == NULL ||
+ blkif->state == XenbusStateConnected)
+ return;
+
+ DPRINTF("%s\n", xdev->otherend);
+
+ /* Supply the information about the device the frontend needs */
+again:
+ xbt = xenbus_transaction_start();
+ if (IS_ERR(xbt)) {
+ xenbus_dev_fatal(xdev, PTR_ERR(xbt),
+ "Error writing configuration for backend "
+ "(start transaction)");
+ return;
+ }
+
+ err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
+ blkif->media_num_sectors);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "writing %s/sectors",
+ xdev->nodename);
+ goto abort;
+ }
+
+ err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
+ blkif->read_only ? VDISK_READONLY : 0);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "writing %s/info",
+ xdev->nodename);
+ goto abort;
+ }
+ err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
+ blkif->sector_size);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
+ xdev->nodename);
+ goto abort;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == -EAGAIN)
+ goto again;
+ if (err)
+ xenbus_dev_fatal(xdev, err, "ending transaction");
+
+ err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
+ if (err)
+ xenbus_dev_fatal(xdev, err, "switching to Connected state",
+ xdev->nodename);
+
+ blkif->state = XenbusStateConnected;
+
+ return;
+
+ abort:
+ xenbus_transaction_end(xbt, 1);
+}
+
+static int
+blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+ int err;
+ char *p, *mode = NULL, *type = NULL, *params = NULL;
+ long handle;
+
+ DPRINTF("node=%s\n", xdev->nodename);
+
+ p = strrchr(xdev->otherend, '/') + 1;
+ handle = strtoul(p, NULL, 0);
+
+ mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
+ if (IS_ERR(mode)) {
+ xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
+ err = PTR_ERR(mode);
+ goto error;
+ }
+
+ type = xenbus_read(NULL, xdev->nodename, "type", NULL);
+ if (IS_ERR(type)) {
+ xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
+ err = PTR_ERR(type);
+ goto error;
+ }
+
+ params = xenbus_read(NULL, xdev->nodename, "params", NULL);
+ if (IS_ERR(type)) {
+ xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
+ err = PTR_ERR(params);
+ goto error;
+ }
+
+ err = blkif_create(xdev, handle, mode, type, params);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "creating blkif");
+ goto error;
+ }
+
+ err = vbd_add_dev(xdev);
+ if (err) {
+ blkif_put((blkif_t *)xdev->data);
+ xenbus_dev_fatal(xdev, err, "adding vbd device");
+ }
+
+ return err;
+
+ error:
+ if (mode)
+ free(mode, M_DEVBUF);
+ if (type)
+ free(type, M_DEVBUF);
+ if (params)
+ free(params, M_DEVBUF);
+ return err;
+}
+
+static int
+blkback_remove(struct xenbus_device *xdev)
+{
+ blkif_t *blkif = xdev->data;
+ device_t ndev;
+
+ DPRINTF("node=%s\n", xdev->nodename);
+
+ blkif->state = XenbusStateClosing;
+
+ if ((ndev = blkif->ndev)) {
+ blkif->ndev = NULL;
+ mtx_lock(&Giant);
+ device_detach(ndev);
+ mtx_unlock(&Giant);
+ }
+
+ xdev->data = NULL;
+ blkif->xdev = NULL;
+ blkif_put(blkif);
+
+ return 0;
+}
+
+static int
+blkback_resume(struct xenbus_device *xdev)
+{
+ DPRINTF("node=%s\n", xdev->nodename);
+ return 0;
+}
+
+static void
+frontend_changed(struct xenbus_device *xdev,
+ XenbusState frontend_state)
+{
+ blkif_t *blkif = xdev->data;
+
+ DPRINTF("state=%d\n", frontend_state);
+
+ blkif->frontend_state = frontend_state;
+
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ break;
+ case XenbusStateInitialised:
+ case XenbusStateConnected:
+ connect_ring(blkif);
+ connect(blkif);
+ break;
+ case XenbusStateClosing:
+ xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+ break;
+ case XenbusStateClosed:
+ xenbus_remove_device(xdev);
+ break;
+ case XenbusStateUnknown:
+ case XenbusStateInitWait:
+ xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id blkback_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+static struct xenbus_driver blkback = {
+ .name = "blkback",
+ .ids = blkback_ids,
+ .probe = blkback_probe,
+ .remove = blkback_remove,
+ .resume = blkback_resume,
+ .otherend_changed = frontend_changed,
+};
+
+static void
+blkback_init(void *unused)
+{
+ int i;
+
+ TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
+ mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
+
+ mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
+
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ pending_reqs = malloc(sizeof(pending_reqs[0]) *
+ blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
+ pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
+ mmap_pages, M_DEVBUF, M_NOWAIT);
+ pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
+ mmap_pages, M_DEVBUF, M_NOWAIT);
+ mmap_vstart = alloc_empty_page_range(mmap_pages);
+ if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
+ if (pending_reqs)
+ free(pending_reqs, M_DEVBUF);
+ if (pending_grant_handles)
+ free(pending_grant_handles, M_DEVBUF);
+ if (pending_vaddrs)
+ free(pending_vaddrs, M_DEVBUF);
+ WPRINTF("out of memory\n");
+ return;
+ }
+
+ for (i = 0; i < mmap_pages; i++) {
+ pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+ }
+
+ for (i = 0; i < blkif_reqs; i++) {
+ STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
+ }
+
+ DPRINTF("registering %s\n", blkback.name);
+ xenbus_register_backend(&blkback);
+}
+
+SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
+
+static void
+close_device(blkif_t *blkif)
+{
+ DPRINTF("closing dev=%s\n", blkif->dev_name);
+ if (blkif->vn) {
+ int flags = FREAD;
+
+ if (!blkif->read_only)
+ flags |= FWRITE;
+
+ if (blkif->csw) {
+ dev_relthread(blkif->cdev);
+ blkif->csw = NULL;
+ }
+
+ (void)vn_close(blkif->vn, flags, NOCRED, curthread);
+ blkif->vn = NULL;
+ }
+}
+
+static int
+open_device(blkif_t *blkif)
+{
+ struct nameidata nd;
+ struct vattr vattr;
+ struct cdev *dev;
+ struct cdevsw *devsw;
+ int flags = FREAD, err = 0;
+
+ DPRINTF("opening dev=%s\n", blkif->dev_name);
+
+ if (!blkif->read_only)
+ flags |= FWRITE;
+
+ if (!curthread->td_proc->p_fd->fd_cdir) {
+ curthread->td_proc->p_fd->fd_cdir = rootvnode;
+ VREF(rootvnode);
+ }
+ if (!curthread->td_proc->p_fd->fd_rdir) {
+ curthread->td_proc->p_fd->fd_rdir = rootvnode;
+ VREF(rootvnode);
+ }
+ if (!curthread->td_proc->p_fd->fd_jdir) {
+ curthread->td_proc->p_fd->fd_jdir = rootvnode;
+ VREF(rootvnode);
+ }
+
+ again:
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
+ err = vn_open(&nd, &flags, 0, -1);
+ if (err) {
+ if (blkif->dev_name[0] != '/') {
+ char *dev_path = "/dev/";
+ char *dev_name;
+
+ /* Try adding device path at beginning of name */
+ dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
+ if (dev_name) {
+ sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
+ free(blkif->dev_name, M_DEVBUF);
+ blkif->dev_name = dev_name;
+ goto again;
+ }
+ }
+ xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
+ return err;
+ }
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+
+ blkif->vn = nd.ni_vp;
+
+ /* We only support disks for now */
+ if (!vn_isdisk(blkif->vn, &err)) {
+ xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
+ VOP_UNLOCK(blkif->vn, 0, curthread);
+ goto error;
+ }
+
+ blkif->cdev = blkif->vn->v_rdev;
+ blkif->csw = dev_refthread(blkif->cdev);
+ PANIC_IF(blkif->csw == NULL);
+
+ err = VOP_GETATTR(blkif->vn, &vattr, NOCRED, curthread);
+ if (err) {
+ xenbus_dev_fatal(blkif->xdev, err,
+ "error getting vnode attributes for device %s", blkif->dev_name);
+ VOP_UNLOCK(blkif->vn, 0, curthread);
+ goto error;
+ }
+
+ VOP_UNLOCK(blkif->vn, 0, curthread);
+
+ dev = blkif->vn->v_rdev;
+ devsw = dev->si_devsw;
+ if (!devsw->d_ioctl) {
+ err = ENODEV;
+ xenbus_dev_fatal(blkif->xdev, err,
+ "no d_ioctl for device %s!", blkif->dev_name);
+ goto error;
+ }
+
+ err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
+ if (err) {
+ xenbus_dev_fatal(blkif->xdev, err,
+ "error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
+ goto error;
+ }
+ blkif->sector_size_shift = fls(blkif->sector_size) - 1;
+
+ err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
+ if (err) {
+ xenbus_dev_fatal(blkif->xdev, err,
+ "error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
+ goto error;
+ }
+ blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
+
+ blkif->major = umajor(vattr.va_rdev);
+ blkif->minor = uminor(vattr.va_rdev);
+
+ DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
+ blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
+
+ return 0;
+
+ error:
+ close_device(blkif);
+ return err;
+}
+
+static int
+vbd_add_dev(struct xenbus_device *xdev)
+{
+ blkif_t *blkif = xdev->data;
+ device_t nexus, ndev;
+ devclass_t dc;
+ int err = 0;
+
+ mtx_lock(&Giant);
+
+ /* We will add a vbd device as a child of nexus0 (for now) */
+ if (!(dc = devclass_find("nexus")) ||
+ !(nexus = devclass_get_device(dc, 0))) {
+ WPRINTF("could not find nexus0!\n");
+ err = ENOENT;
+ goto done;
+ }
+
+
+ /* Create a newbus device representing the vbd */
+ ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
+ if (!ndev) {
+ WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
+ err = EFAULT;
+ goto done;
+ }
+
+ blkif_get(blkif);
+ device_set_ivars(ndev, blkif);
+ blkif->ndev = ndev;
+
+ device_probe_and_attach(ndev);
+
+ done:
+
+ mtx_unlock(&Giant);
+
+ return err;
+}
+
+enum {
+ VBD_SYSCTL_DOMID,
+ VBD_SYSCTL_ST_RD_REQ,
+ VBD_SYSCTL_ST_WR_REQ,
+ VBD_SYSCTL_ST_OO_REQ,
+ VBD_SYSCTL_ST_ERR_REQ,
+ VBD_SYSCTL_RING,
+};
+
+static char *
+vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
+{
+ char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+ if (buf) {
+ if (!blkif->ring_connected)
+ sprintf(buf, "ring not connected\n");
+ else {
+ blkif_back_ring_t *ring = &blkif->ring;
+ sprintf(buf, "nr_ents=%x req_cons=%x"
+ " req_prod=%x req_event=%x"
+ " rsp_prod=%x rsp_event=%x",
+ ring->nr_ents, ring->req_cons,
+ ring->sring->req_prod, ring->sring->req_event,
+ ring->sring->rsp_prod, ring->sring->rsp_event);
+ }
+ }
+ return buf;
+}
+
+static int
+vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ device_t dev = (device_t)arg1;
+ blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+ const char *value;
+ char *buf = NULL;
+ int err;
+
+ switch (arg2) {
+ case VBD_SYSCTL_DOMID:
+ return sysctl_handle_int(oidp, NULL, blkif->domid, req);
+ case VBD_SYSCTL_ST_RD_REQ:
+ return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
+ case VBD_SYSCTL_ST_WR_REQ:
+ return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
+ case VBD_SYSCTL_ST_OO_REQ:
+ return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
+ case VBD_SYSCTL_ST_ERR_REQ:
+ return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
+ case VBD_SYSCTL_RING:
+ value = buf = vbd_sysctl_ring_info(blkif, arg2);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ err = SYSCTL_OUT(req, value, strlen(value));
+ if (buf != NULL)
+ free(buf, M_DEVBUF);
+
+ return err;
+}
+
+/* Newbus vbd device driver probe */
+static int
+vbd_probe(device_t dev)
+{
+ DPRINTF("vbd%d\n", device_get_unit(dev));
+ return 0;
+}
+
+/* Newbus vbd device driver attach */
+static int
+vbd_attach(device_t dev)
+{
+ blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+ DPRINTF("%s\n", blkif->dev_name);
+
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
+ "domid of frontend");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
+ "number of read reqs");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
+ "number of write reqs");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
+ "number of deferred reqs");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
+ "number of reqs that returned error");
+#if XEN_BLKBACK_DEBUG
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "ring", CTLFLAG_RD,
+ dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
+ "req ring info");
+#endif
+
+ if (!open_device(blkif))
+ connect(blkif);
+
+ return bus_generic_attach(dev);
+}
+
+/* Newbus vbd device driver detach */
+static int
+vbd_detach(device_t dev)
+{
+ blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+ DPRINTF("%s\n", blkif->dev_name);
+
+ close_device(blkif);
+
+ bus_generic_detach(dev);
+
+ blkif_put(blkif);
+
+ return 0;
+}
+
+static device_method_t vbd_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vbd_probe),
+ DEVMETHOD(device_attach, vbd_attach),
+ DEVMETHOD(device_detach, vbd_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+ {0, 0}
+};
+
+static devclass_t vbd_devclass;
+
+static driver_t vbd_driver = {
+ "vbd",
+ vbd_methods,
+ 0,
+};
+
+DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c
new file mode 100644
index 0000000..c448b81
--- /dev/null
+++ b/sys/dev/xen/blkfront/blkfront.c
@@ -0,0 +1,1021 @@
+/*-
+ * All rights reserved.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * XenoBSD block device driver
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/intr_machdep.h>
+#include <machine/vmparam.h>
+
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xen_intr.h>
+#include <machine/xen/xenbus.h>
+#include <machine/xen/evtchn.h>
+#include <xen/interface/grant_table.h>
+
+#include <geom/geom_disk.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/gnttab.h>
+
+#include <dev/xen/blkfront/block.h>
+
+#define ASSERT(S) KASSERT(S, (#S))
+/* prototypes */
+struct xb_softc;
+static void xb_startio(struct xb_softc *sc);
+static void connect(struct blkfront_info *);
+static void blkfront_closing(struct xenbus_device *);
+static int blkfront_remove(struct xenbus_device *);
+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
+static void blkif_int(void *);
+#if 0
+static void blkif_restart_queue(void *arg);
+#endif
+static void blkif_recover(struct blkfront_info *);
+static void blkif_completion(struct blk_shadow *);
+static void blkif_free(struct blkfront_info *, int);
+
+#define GRANT_INVALID_REF 0
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 0
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#endif
+
+
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED 1
+#define BLKIF_STATE_SUSPENDED 2
+
+#ifdef notyet
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+ [BLKIF_STATE_SUSPENDED] = "closed",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+#endif
+#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
+#if 0
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+static grant_ref_t gref_head;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+
+static void kick_pending_request_queues(struct blkfront_info *);
+static int blkif_open(struct disk *dp);
+static int blkif_close(struct disk *dp);
+static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
+static int blkif_queue_request(struct bio *bp);
+static void xb_strategy(struct bio *bp);
+
+
+
+/* XXX move to xb_vbd.c when VBD update support is added */
+#define MAX_VBDS 64
+
+#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
+#define XBD_SECTOR_SHFT 9
+
+static struct mtx blkif_io_lock;
+
+static unsigned long
+pfn_to_mfn(unsigned long pfn)
+{
+ return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+}
+
+
+int
+xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size,
+ struct blkfront_info *info)
+{
+ struct xb_softc *sc;
+ int error = 0;
+
+ sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc->xb_unit = unit;
+ sc->xb_info = info;
+ info->sc = sc;
+
+ memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
+ sc->xb_disk = disk_alloc();
+ sc->xb_disk->d_unit = unit;
+ sc->xb_disk->d_open = blkif_open;
+ sc->xb_disk->d_close = blkif_close;
+ sc->xb_disk->d_ioctl = blkif_ioctl;
+ sc->xb_disk->d_strategy = xb_strategy;
+ sc->xb_disk->d_name = "xbd";
+ sc->xb_disk->d_drv1 = sc;
+ sc->xb_disk->d_sectorsize = sector_size;
+
+ /* XXX */
+ sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
+#if 0
+ sc->xb_disk->d_maxsize = DFLTPHYS;
+#else /* XXX: xen can't handle large single i/o requests */
+ sc->xb_disk->d_maxsize = 4096;
+#endif
+#ifdef notyet
+ XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
+ xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
+ sc->xb_disk->d_mediasize);
+#endif
+ sc->xb_disk->d_flags = 0;
+ disk_create(sc->xb_disk, DISK_VERSION_00);
+ bioq_init(&sc->xb_bioq);
+
+ return error;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+ struct xb_softc *sc;
+
+ sc = info->sc;
+ disk_destroy(sc->xb_disk);
+}
+/************************ end VBD support *****************/
+
+/*
+ * Read/write routine for a buffer. Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
+static void
+xb_strategy(struct bio *bp)
+{
+ struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+ /* bogus disk? */
+ if (sc == NULL) {
+ bp->bio_error = EINVAL;
+ bp->bio_flags |= BIO_ERROR;
+ goto bad;
+ }
+
+ DPRINTK("");
+
+ /*
+ * Place it in the queue of disk activities for this disk
+ */
+ mtx_lock(&blkif_io_lock);
+ bioq_disksort(&sc->xb_bioq, bp);
+
+ xb_startio(sc);
+ mtx_unlock(&blkif_io_lock);
+ return;
+
+ bad:
+ /*
+ * Correctly set the bio to indicate a failed tranfer.
+ */
+ bp->bio_resid = bp->bio_bcount;
+ biodone(bp);
+ return;
+}
+
+
+/* Setup supplies the backend dir, virtual device.
+
+We place an event channel and shared frame entries.
+We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err, vdevice, i;
+ struct blkfront_info *info;
+
+ /* FIXME: Use dynamic device id if this is not set. */
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
+ "virtual-device", "%i", &vdevice);
+ if (err != 1) {
+ xenbus_dev_fatal(dev, err, "reading virtual-device");
+ return err;
+ }
+
+ info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (info == NULL) {
+ xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
+ return ENOMEM;
+ }
+
+ /*
+ * XXX debug only
+ */
+ for (i = 0; i < sizeof(*info); i++)
+ if (((uint8_t *)info)[i] != 0)
+ panic("non-null memory");
+
+ info->shadow_free = 0;
+ info->xbdev = dev;
+ info->vdevice = vdevice;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+
+ /* work queue needed ? */
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+ dev->dev_driver_data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ free(info, M_DEVBUF);
+ dev->dev_driver_data = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev_driver_data;
+ int err;
+
+ DPRINTK("blkfront_resume: %s\n", dev->nodename);
+
+ blkif_free(info, 1);
+
+ err = talk_to_backend(dev, info);
+ if (!err)
+ blkif_recover(info);
+
+ return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ const char *message = NULL;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, info);
+ if (err)
+ goto out;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto destroy_blkring;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref","%u", info->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "event-channel", "%u", irq_to_evtchn_port(info->irq));
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto destroy_blkring;
+ }
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ if (message)
+ xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+ blkif_free(info, 0);
+ out:
+ return err;
+}
+
+static int
+setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
+{
+ blkif_sring_t *sring;
+ int err;
+
+ info->ring_ref = GRANT_INVALID_REF;
+
+ sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (sring == NULL) {
+ xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
+ return ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
+ if (err < 0) {
+ free(sring, M_DEVBUF);
+ info->ring.sring = NULL;
+ goto fail;
+ }
+ info->ring_ref = err;
+
+ err = bind_listening_port_to_irqhandler(dev->otherend_id,
+ "xbd", (driver_intr_t *)blkif_int, info,
+ INTR_TYPE_BIO | INTR_MPSAFE, NULL);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+ goto fail;
+ }
+ info->irq = err;
+
+ return 0;
+ fail:
+ blkif_free(info, 0);
+ return err;
+}
+
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+ XenbusState backend_state)
+{
+ struct blkfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("blkfront:backend_changed.\n");
+
+ switch (backend_state) {
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateConnected:
+ connect(info);
+ break;
+
+ case XenbusStateClosing:
+ if (info->users > 0)
+ xenbus_dev_error(dev, -EBUSY,
+ "Device in use; refusing to close");
+ else
+ blkfront_closing(dev);
+#ifdef notyet
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+ down(&bd->bd_sem);
+ if (info->users > 0)
+ xenbus_dev_error(dev, -EBUSY,
+ "Device in use; refusing to close");
+ else
+ blkfront_closing(dev);
+ up(&bd->bd_sem);
+ bdput(bd);
+#endif
+ }
+}
+
+/*
+** Invoked when the backend is finally 'ready' (and has told produced
+** the details about the physical device - #sectors, size, etc).
+*/
+static void
+connect(struct blkfront_info *info)
+{
+ unsigned long sectors, sector_size;
+ unsigned int binfo;
+ int err;
+
+ if( (info->connected == BLKIF_STATE_CONNECTED) ||
+ (info->connected == BLKIF_STATE_SUSPENDED) )
+ return;
+
+ DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
+
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "sectors", "%lu", &sectors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", &sector_size,
+ NULL);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err,
+ "reading backend fields at %s",
+ info->xbdev->otherend);
+ return;
+ }
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-barrier", "%lu", &info->feature_barrier,
+ NULL);
+ if (err)
+ info->feature_barrier = 0;
+
+ xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+
+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ /* Kick pending requests. */
+ mtx_lock(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
+ kick_pending_request_queues(info);
+ mtx_unlock(&blkif_io_lock);
+
+#if 0
+ add_disk(info->gd);
+#endif
+}
+
+/**
+ * Handle the change of state of the backend to Closing. We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend. Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+ if (info->mi) {
+ DPRINTK("Calling xlvbd_del\n");
+ xlvbd_del(info);
+ info->mi = NULL;
+ }
+
+ xenbus_switch_state(dev, XenbusStateClosed);
+}
+
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
+
+ blkif_free(info, 0);
+
+ free(info, M_DEVBUF);
+
+ return 0;
+}
+
+
+static inline int
+GET_ID_FROM_FREELIST(struct blkfront_info *info)
+{
+ unsigned long nfree = info->shadow_free;
+
+ KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
+ info->shadow_free = info->shadow[nfree].req.id;
+ info->shadow[nfree].req.id = 0x0fffffee; /* debug */
+ return nfree;
+}
+
+static inline void
+ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
+{
+ info->shadow[id].req.id = info->shadow_free;
+ info->shadow[id].request = 0;
+ info->shadow_free = id;
+}
+
+static inline void
+flush_requests(struct blkfront_info *info)
+{
+ int notify;
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+ if (notify)
+ notify_remote_via_irq(info->irq);
+}
+
+static void
+kick_pending_request_queues(struct blkfront_info *info)
+{
+ /* XXX check if we can't simplify */
+#if 0
+ if (!RING_FULL(&info->ring)) {
+ /* Re-enable calldowns. */
+ blk_start_queue(info->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(info->rq);
+ }
+#endif
+ if (!RING_FULL(&info->ring)) {
+#if 0
+ sc = LIST_FIRST(&xbsl_head);
+ LIST_REMOVE(sc, entry);
+ /* Re-enable calldowns. */
+ blk_start_queue(di->rq);
+#endif
+ /* Kick things off immediately. */
+ xb_startio(info->sc);
+ }
+}
+
+#if 0
+/* XXX */
+static void blkif_restart_queue(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+
+ mtx_lock(&blkif_io_lock);
+ kick_pending_request_queues(info);
+ mtx_unlock(&blkif_io_lock);
+}
+#endif
+
+static void blkif_restart_queue_callback(void *arg)
+{
+#if 0
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ /* XXX BSD equiv ? */
+
+ schedule_work(&info->work);
+#endif
+}
+
+static int
+blkif_open(struct disk *dp)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ if (sc == NULL) {
+ printk("xb%d: not found", sc->xb_unit);
+ return (ENXIO);
+ }
+
+ sc->xb_flags |= XB_OPEN;
+ sc->xb_info->users++;
+ return (0);
+}
+
+static int
+blkif_close(struct disk *dp)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ if (sc == NULL)
+ return (ENXIO);
+ sc->xb_flags &= ~XB_OPEN;
+ if (--(sc->xb_info->users) == 0) {
+ /* Check whether we have been instructed to close. We will
+ have ignored this request initially, as the device was
+ still mounted. */
+ struct xenbus_device * dev = sc->xb_info->xbdev;
+ XenbusState state = xenbus_read_driver_state(dev->otherend);
+
+ if (state == XenbusStateClosing)
+ blkfront_closing(dev);
+ }
+ return (0);
+}
+
+static int
+blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+ struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
+
+ if (sc == NULL)
+ return (ENXIO);
+
+ return (ENOTTY);
+}
+
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int blkif_queue_request(struct bio *bp)
+{
+ caddr_t alignbuf;
+ unsigned long buffer_ma;
+ blkif_request_t *ring_req;
+ unsigned long id;
+ unsigned int fsect, lsect;
+ struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+ struct blkfront_info *info = sc->xb_info;
+ int ref;
+
+ if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
+ return 1;
+
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+ gnttab_request_free_callback(
+ &info->callback,
+ blkif_restart_queue_callback,
+ info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+
+ /* Check if the buffer is properly aligned */
+ if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
+ int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
+ PAGE_SIZE;
+ caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
+ M_NOWAIT);
+
+ alignbuf = (char *)roundup2((u_long)newbuf, align);
+
+ /* save a copy of the current buffer */
+ bp->bio_driver1 = newbuf;
+ bp->bio_driver2 = alignbuf;
+
+ /* Copy the data for a write */
+ if (bp->bio_cmd == BIO_WRITE)
+ bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
+ } else
+ alignbuf = bp->bio_data;
+
+ /* Fill out a communications ring structure. */
+ ring_req = RING_GET_REQUEST(&info->ring,
+ info->ring.req_prod_pvt);
+ id = GET_ID_FROM_FREELIST(info);
+ info->shadow[id].request = (unsigned long)bp;
+
+ ring_req->id = id;
+ ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+ BLKIF_OP_WRITE;
+
+ ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
+ ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
+
+ ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer
+ * chaining is not supported.
+ */
+
+ buffer_ma = vtomach(alignbuf);
+ fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+ lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head);
+ KASSERT( ref != -ENOSPC, ("grant_reference failed") );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ info->xbdev->otherend_id,
+ buffer_ma >> PAGE_SHIFT,
+ ring_req->operation & 1 ); /* ??? */
+ info->shadow[id].frame[ring_req->nr_segments] =
+ buffer_ma >> PAGE_SHIFT;
+
+ ring_req->seg[ring_req->nr_segments] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
+
+ ring_req->nr_segments++;
+ KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
+ ("XEN buffer must be sector aligned"));
+ KASSERT(lsect <= 7,
+ ("XEN disk driver data cannot cross a page boundary"));
+
+ buffer_ma &= ~PAGE_MASK;
+
+ info->ring.req_prod_pvt++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ info->shadow[id].req = *ring_req;
+
+ gnttab_free_grant_references(gref_head);
+
+ return 0;
+}
+
+
+
+/*
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
+ */
+static void
+xb_startio(struct xb_softc *sc)
+{
+ struct bio *bp;
+ int queued = 0;
+ struct blkfront_info *info = sc->xb_info;
+ DPRINTK("");
+
+ mtx_assert(&blkif_io_lock, MA_OWNED);
+
+ while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
+
+ if (RING_FULL(&info->ring))
+ goto wait;
+
+ if (blkif_queue_request(bp)) {
+ wait:
+ bioq_insert_head(&sc->xb_bioq, bp);
+ break;
+ }
+ queued++;
+ }
+
+ if (queued != 0)
+ flush_requests(sc->xb_info);
+}
+
+static void
+blkif_int(void *xsc)
+{
+ struct xb_softc *sc = NULL;
+ struct bio *bp;
+ blkif_response_t *bret;
+ RING_IDX i, rp;
+ struct blkfront_info *info = xsc;
+ DPRINTK("");
+
+ TRACE_ENTER;
+
+ mtx_lock(&blkif_io_lock);
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ mtx_unlock(&blkif_io_lock);
+ return;
+ }
+
+ again:
+ rp = info->ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = info->ring.rsp_cons; i != rp; i++) {
+ unsigned long id;
+
+ bret = RING_GET_RESPONSE(&info->ring, i);
+ id = bret->id;
+ bp = (struct bio *)info->shadow[id].request;
+
+ blkif_completion(&info->shadow[id]);
+
+ ADD_ID_TO_FREELIST(info, id);
+
+ switch (bret->operation) {
+ case BLKIF_OP_READ:
+ /* had an unaligned buffer that needs to be copied */
+ if (bp->bio_driver1)
+ bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
+ /* FALLTHROUGH */
+ case BLKIF_OP_WRITE:
+
+ /* free the copy buffer */
+ if (bp->bio_driver1) {
+ free(bp->bio_driver1, M_DEVBUF);
+ bp->bio_driver1 = NULL;
+ }
+
+ if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
+ XENPRINTF("Bad return from blkdev data request: %x\n",
+ bret->status);
+ bp->bio_flags |= BIO_ERROR;
+ }
+
+ sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+ if (bp->bio_flags & BIO_ERROR)
+ bp->bio_error = EIO;
+ else
+ bp->bio_resid = 0;
+
+ biodone(bp);
+ break;
+ default:
+ panic("received invalid operation");
+ break;
+ }
+ }
+
+ info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ if (more_to_do)
+ goto again;
+ } else {
+ info->ring.sring->rsp_event = i + 1;
+ }
+
+ kick_pending_request_queues(info);
+
+ mtx_unlock(&blkif_io_lock);
+}
+
+static void
+blkif_free(struct blkfront_info *info, int suspend)
+{
+
+/* Prevent new requests being issued until we fix things up. */
+ mtx_lock(&blkif_io_lock);
+ info->connected = suspend ?
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ mtx_unlock(&blkif_io_lock);
+
+ /* Free resources associated with old device channel. */
+ if (info->ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ info->ring.sring);
+ info->ring_ref = GRANT_INVALID_REF;
+ info->ring.sring = NULL;
+ }
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = 0;
+
+}
+
+static void
+blkif_completion(struct blk_shadow *s)
+{
+ int i;
+
+ for (i = 0; i < s->req.nr_segments; i++)
+ gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+}
+
+static void
+blkif_recover(struct blkfront_info *info)
+{
+ int i, j;
+ blkif_request_t *req;
+ struct blk_shadow *copy;
+
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
+ PANIC_IF(copy == NULL);
+ memcpy(copy, info->shadow, sizeof(info->shadow));
+
+ /* Stage 2: Set up free list. */
+ memset(&info->shadow, 0, sizeof(info->shadow));
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow_free = info->ring.req_prod_pvt;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ /* Not in use? */
+ if (copy[i].request == 0)
+ continue;
+
+ /* Grab a request slot and copy shadow state into it. */
+ req = RING_GET_REQUEST(
+ &info->ring, info->ring.req_prod_pvt);
+ *req = copy[i].req;
+
+ /* We get a new request id, and must reset the shadow state. */
+ req->id = GET_ID_FROM_FREELIST(info);
+ memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+ /* Rewrite any grant references invalidated by suspend/resume. */
+ for (j = 0; j < req->nr_segments; j++)
+ gnttab_grant_foreign_access_ref(
+ req->seg[j].gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(info->shadow[req->id].frame[j]),
+ 0 /* assume not readonly */);
+
+ info->shadow[req->id].req = *req;
+
+ info->ring.req_prod_pvt++;
+ }
+
+ free(copy, M_DEVBUF);
+
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ /* Now safe for us to use the shared ring */
+ mtx_lock(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
+ mtx_unlock(&blkif_io_lock);
+
+ /* Send off requeued requests */
+ mtx_lock(&blkif_io_lock);
+ flush_requests(info);
+
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
+ mtx_unlock(&blkif_io_lock);
+}
+
+static int
+blkfront_is_ready(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev_driver_data;
+
+ return info->is_ready;
+}
+
+static struct xenbus_device_id blkfront_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+
+static struct xenbus_driver blkfront = {
+ .name = "vbd",
+ .ids = blkfront_ids,
+ .probe = blkfront_probe,
+ .remove = blkfront_remove,
+ .resume = blkfront_resume,
+ .otherend_changed = backend_changed,
+ .is_ready = blkfront_is_ready,
+};
+
+
+
+static void
+xenbus_init(void)
+{
+ xenbus_register_frontend(&blkfront);
+}
+
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
+SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h
new file mode 100644
index 0000000..0d14459
--- /dev/null
+++ b/sys/dev/xen/blkfront/block.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+#include <xen/interface/io/blkif.h>
+
+struct xlbd_type_info
+{
+ int partn_shift;
+ int disks_per_major;
+ char *devname;
+ char *diskname;
+};
+
+struct xlbd_major_info
+{
+ int major;
+ int index;
+ int usage;
+ struct xlbd_type_info *type;
+};
+
+struct blk_shadow {
+ blkif_request_t req;
+ unsigned long request;
+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+
+struct xb_softc {
+ device_t xb_dev;
+ struct disk *xb_disk; /* disk params */
+ struct bio_queue_head xb_bioq; /* sort queue */
+ int xb_unit;
+ int xb_flags;
+ struct blkfront_info *xb_info;
+ LIST_ENTRY(xb_softc) entry;
+#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
+};
+
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'. They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+ struct xenbus_device *xbdev;
+ dev_t dev;
+ struct gendisk *gd;
+ int vdevice;
+ blkif_vdev_t handle;
+ int connected;
+ int ring_ref;
+ blkif_front_ring_t ring;
+ unsigned int irq;
+ struct xlbd_major_info *mi;
+#if 0
+ request_queue_t *rq;
+ struct work_struct work;
+#endif
+ struct gnttab_free_callback callback;
+ struct blk_shadow shadow[BLK_RING_SIZE];
+ unsigned long shadow_free;
+ struct xb_softc *sc;
+ int feature_barrier;
+ int is_ready;
+ /**
+ * The number of people holding this device open. We won't allow a
+ * hot-unplug unless this is 0.
+ */
+ int users;
+};
+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
+ to call add_disk on info->gd once the disk is properly connected
+ up. */
+int xlvbd_add(blkif_sector_t capacity, int device,
+ uint16_t vdisk_info, uint16_t sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
+
diff --git a/sys/dev/xen/console/console.c b/sys/dev/xen/console/console.c
new file mode 100644
index 0000000..dc9fe6f
--- /dev/null
+++ b/sys/dev/xen/console/console.c
@@ -0,0 +1,564 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <sys/cons.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+
+#include <dev/xen/console/xencons_ring.h>
+#include <xen/interface/io/console.h>
+
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+devclass_t xc_devclass; /* do not make static */
+static void xcstart (struct tty *);
+static int xcparam (struct tty *, struct termios *);
+static void xcstop (struct tty *, int);
+static void xc_timeout(void *);
+static void __xencons_tx_flush(void);
+static boolean_t xcons_putc(int c);
+
+/* switch console so that shutdown can occur gracefully */
+static void xc_shutdown(void *arg, int howto);
+static int xc_mute;
+
+static void xcons_force_flush(void);
+static void xencons_priv_interrupt(void *);
+
+static cn_probe_t xccnprobe;
+static cn_init_t xccninit;
+static cn_getc_t xccngetc;
+static cn_putc_t xccnputc;
+static cn_putc_t xccnputc_dom0;
+static cn_checkc_t xccncheckc;
+
+#define XC_POLLTIME (hz/10)
+
+CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc,
+ xccncheckc, xccnputc, NULL);
+
+static int xen_console_up;
+static boolean_t xc_start_needed;
+static struct callout xc_callout;
+struct mtx cn_mtx;
+
+#define RBUF_SIZE 1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+#define WBUF_SIZE 4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static char rbuf[RBUF_SIZE];
+static int rc, rp;
+static unsigned int cnsl_evt_reg;
+static unsigned int wc, wp; /* write_cons, write_prod */
+
+#define CDEV_MAJOR 12
+#define XCUNIT(x) (minor(x))
+#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN))
+#define CN_LOCK_INIT(x, _name) \
+ mtx_init(&x, _name, NULL, MTX_SPIN|MTX_RECURSE)
+
+#define CN_LOCK(l) \
+ do { \
+ if (panicstr == NULL) \
+ mtx_lock_spin(&(l)); \
+ } while (0)
+#define CN_UNLOCK(l) \
+ do { \
+ if (panicstr == NULL) \
+ mtx_unlock_spin(&(l)); \
+ } while (0)
+#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED)
+#define CN_LOCK_DESTROY(x) mtx_destroy(&x)
+
+
+static struct tty *xccons;
+
+struct xc_softc {
+ int xc_unit;
+ struct cdev *xc_dev;
+};
+
+
+static d_open_t xcopen;
+static d_close_t xcclose;
+static d_ioctl_t xcioctl;
+
+static struct cdevsw xc_cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_TTY | D_NEEDGIANT,
+ .d_name = driver_name,
+ .d_open = xcopen,
+ .d_close = xcclose,
+ .d_read = ttyread,
+ .d_write = ttywrite,
+ .d_ioctl = xcioctl,
+ .d_poll = ttypoll,
+ .d_kqfilter = ttykqfilter,
+};
+
+static void
+xccnprobe(struct consdev *cp)
+{
+ cp->cn_pri = CN_REMOTE;
+ cp->cn_tp = xccons;
+ sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+
+static void
+xccninit(struct consdev *cp)
+{
+ CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
+
+}
+int
+xccngetc(struct consdev *dev)
+{
+ int c;
+ if (xc_mute)
+ return 0;
+ do {
+ if ((c = xccncheckc(dev)) == -1) {
+ /* polling without sleeping in Xen doesn't work well.
+ * Sleeping gives other things like clock a chance to
+ * run
+ */
+ tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep",
+ XC_POLLTIME);
+ }
+ } while(c == -1);
+ return c;
+}
+
+int
+xccncheckc(struct consdev *dev)
+{
+ int ret = (xc_mute ? 0 : -1);
+ if (xencons_has_input())
+ xencons_handle_input(NULL);
+
+ CN_LOCK(cn_mtx);
+ if ((rp - rc)) {
+ /* we need to return only one char */
+ ret = (int)rbuf[RBUF_MASK(rc)];
+ rc++;
+ }
+ CN_UNLOCK(cn_mtx);
+ return(ret);
+}
+
+static void
+xccnputc(struct consdev *dev, int c)
+{
+ xcons_putc(c);
+}
+
+static void
+xccnputc_dom0(struct consdev *dev, int c)
+{
+ HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c);
+}
+
+extern int db_active;
+static boolean_t
+xcons_putc(int c)
+{
+ int force_flush = xc_mute ||
+#ifdef DDB
+ db_active ||
+#endif
+ panicstr; /* we're not gonna recover, so force
+ * flush
+ */
+
+ if ((wp-wc) < (WBUF_SIZE-1)) {
+ if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') {
+ wbuf[WBUF_MASK(wp++)] = '\r';
+#ifdef notyet
+ if (force_flush)
+ xcons_force_flush();
+#endif
+ }
+ } else if (force_flush) {
+#ifdef notyet
+ xcons_force_flush();
+#endif
+ }
+ if (cnsl_evt_reg)
+ __xencons_tx_flush();
+
+ /* inform start path that we're pretty full */
+ return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
+}
+
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+ device_t child;
+ child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+ device_set_driver(child, driver);
+ device_set_desc(child, "Xen Console");
+}
+
+static int
+xc_probe(device_t dev)
+{
+ struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+ sc->xc_unit = device_get_unit(dev);
+ return (0);
+}
+
+static int
+xc_attach(device_t dev)
+{
+ struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ xc_consdev.cn_putc = xccnputc_dom0;
+ }
+
+ sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
+ xccons = ttyalloc();
+
+ sc->xc_dev->si_drv1 = (void *)sc;
+ sc->xc_dev->si_tty = xccons;
+
+ xccons->t_oproc = xcstart;
+ xccons->t_param = xcparam;
+ xccons->t_stop = xcstop;
+ xccons->t_dev = sc->xc_dev;
+
+ callout_init(&xc_callout, 0);
+
+ xencons_ring_init();
+
+ cnsl_evt_reg = 1;
+ callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
+
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ PANIC_IF(bind_virq_to_irqhandler(
+ VIRQ_CONSOLE,
+ 0,
+ "console",
+ NULL,
+ xencons_priv_interrupt,
+ INTR_TYPE_TTY) < 0);
+
+ }
+
+
+ /* register handler to flush console on shutdown */
+ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
+ NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
+ printf("xencons: shutdown event registration failed!\n");
+
+ TRACE_EXIT;
+ return (0);
+}
+
+/*
+ * return 0 for all console input, force flush all output.
+ */
+static void
+xc_shutdown(void *arg, int howto)
+{
+ xc_mute = 1;
+ xcons_force_flush();
+}
+
+void
+xencons_rx(char *buf, unsigned len)
+{
+ int i;
+ struct tty *tp = xccons;
+
+ for (i = 0; i < len; i++) {
+ if (xen_console_up)
+ (*linesw[tp->t_line]->l_rint)(buf[i], tp);
+ else
+ rbuf[RBUF_MASK(rp++)] = buf[i];
+ }
+}
+
+static void
+__xencons_tx_flush(void)
+{
+ int sz, work_done = 0;
+
+ CN_LOCK(cn_mtx);
+ while (wc != wp) {
+ int sent;
+ sz = wp - wc;
+ if (sz > (WBUF_SIZE - WBUF_MASK(wc)))
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]);
+ wc += sz;
+ } else {
+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+ if (sent == 0)
+ break;
+ wc += sent;
+ }
+ work_done = 1;
+ }
+ CN_UNLOCK(cn_mtx);
+
+ /*
+ * ttwakeup calls routines using blocking locks
+ *
+ */
+ if (work_done && xen_console_up && curthread->td_critnest == 0)
+ ttwakeup(xccons);
+}
+
+void
+xencons_tx(void)
+{
+ __xencons_tx_flush();
+}
+
+static void
+xencons_priv_interrupt(void *arg)
+{
+
+ static char rbuf[16];
+ int l;
+
+ while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+ xencons_rx(rbuf, l);
+
+ xencons_tx();
+}
+
+int
+xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct xc_softc *sc;
+ int unit = XCUNIT(dev);
+ struct tty *tp;
+ int s, error;
+
+ sc = (struct xc_softc *)device_get_softc(
+ devclass_get_device(xc_devclass, unit));
+ if (sc == NULL)
+ return (ENXIO);
+
+ TRACE_ENTER;
+ tp = dev->si_tty;
+ s = spltty();
+ if (!ISTTYOPEN(tp)) {
+ tp->t_state |= TS_CARR_ON;
+ ttychars(tp);
+ tp->t_iflag = TTYDEF_IFLAG;
+ tp->t_oflag = TTYDEF_OFLAG;
+ tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
+ tp->t_lflag = TTYDEF_LFLAG;
+ tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+ xcparam(tp, &tp->t_termios);
+ ttsetwater(tp);
+ } else if (tp->t_state & TS_XCLUDE && priv_check(td, PRIV_ROOT)) {
+ splx(s);
+ return (EBUSY);
+ }
+ splx(s);
+
+ xen_console_up = 1;
+
+ error = (*linesw[tp->t_line]->l_open)(dev, tp);
+ TRACE_EXIT;
+ return error;
+}
+
+int
+xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct tty *tp = dev->si_tty;
+
+ if (tp == NULL)
+ return (0);
+ xen_console_up = 0;
+
+ spltty();
+ (*linesw[tp->t_line]->l_close)(tp, flag);
+ tty_close(tp);
+ spl0();
+ return (0);
+}
+
+
+int
+xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+ struct tty *tp = dev->si_tty;
+ int error;
+
+ error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
+ if (error != ENOIOCTL)
+ return (error);
+
+ error = ttioctl(tp, cmd, data, flag);
+
+ if (error != ENOIOCTL)
+ return (error);
+
+ return (ENOTTY);
+}
+
+static inline int
+__xencons_put_char(int ch)
+{
+ char _ch = (char)ch;
+ if ((wp - wc) == WBUF_SIZE)
+ return 0;
+ wbuf[WBUF_MASK(wp++)] = _ch;
+ return 1;
+}
+
+
+static void
+xcstart(struct tty *tp)
+{
+ boolean_t cons_full = FALSE;
+
+ CN_LOCK(cn_mtx);
+ if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+ CN_UNLOCK(cn_mtx);
+
+ ttwwakeup(tp);
+ return;
+ }
+
+ tp->t_state |= TS_BUSY;
+ CN_UNLOCK(cn_mtx);
+
+ while (tp->t_outq.c_cc != 0 && !cons_full)
+ cons_full = xcons_putc(getc(&tp->t_outq));
+
+ /* if the console is close to full leave our state as busy */
+ if (!cons_full) {
+ CN_LOCK(cn_mtx);
+ tp->t_state &= ~TS_BUSY;
+ CN_UNLOCK(cn_mtx);
+ ttwwakeup(tp);
+ } else {
+ /* let the timeout kick us in a bit */
+ xc_start_needed = TRUE;
+ }
+
+}
+
+static void
+xcstop(struct tty *tp, int flag)
+{
+
+ if (tp->t_state & TS_BUSY) {
+ if ((tp->t_state & TS_TTSTOP) == 0) {
+ tp->t_state |= TS_FLUSH;
+ }
+ }
+}
+
+static void
+xc_timeout(void *v)
+{
+ struct tty *tp;
+ int c;
+
+ tp = (struct tty *)v;
+
+ while ((c = xccncheckc(NULL)) != -1) {
+ if (tp->t_state & TS_ISOPEN) {
+ (*linesw[tp->t_line]->l_rint)(c, tp);
+ }
+ }
+
+ if (xc_start_needed) {
+ xc_start_needed = FALSE;
+ xcstart(tp);
+ }
+
+ callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
+}
+
+/*
+ * Set line parameters.
+ */
+int
+xcparam(struct tty *tp, struct termios *t)
+{
+ tp->t_ispeed = t->c_ispeed;
+ tp->t_ospeed = t->c_ospeed;
+ tp->t_cflag = t->c_cflag;
+ return (0);
+}
+
+
+static device_method_t xc_methods[] = {
+ DEVMETHOD(device_identify, xc_identify),
+ DEVMETHOD(device_probe, xc_probe),
+ DEVMETHOD(device_attach, xc_attach),
+ {0, 0}
+};
+
+static driver_t xc_driver = {
+ driver_name,
+ xc_methods,
+ sizeof(struct xc_softc),
+};
+
+/*** Forcibly flush console data before dying. ***/
+void
+xcons_force_flush(void)
+{
+ int sz;
+
+ if (xen_start_info->flags & SIF_INITDOMAIN)
+ return;
+
+ /* Spin until console data is flushed through to the domain controller. */
+ while (wc != wp) {
+ int sent = 0;
+ if ((sz = wp - wc) == 0)
+ continue;
+
+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+ if (sent > 0)
+ wc += sent;
+ }
+}
+
+DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0);
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/console/xencons_ring.c b/sys/dev/xen/console/xencons_ring.c
new file mode 100644
index 0000000..c9b60ac
--- /dev/null
+++ b/sys/dev/xen/console/xencons_ring.c
@@ -0,0 +1,154 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <sys/cons.h>
+
+
+#include <dev/xen/console/xencons_ring.h>
+#include <machine/xen/evtchn.h>
+#include <xen/interface/io/console.h>
+
+
+#define console_evtchn console.domU.evtchn
+extern char *console_page;
+
+static inline struct xencons_interface *
+xencons_interface(void)
+{
+ return (struct xencons_interface *)console_page;
+}
+
+
+int
+xencons_has_input(void)
+{
+ struct xencons_interface *intf;
+
+ intf = xencons_interface();
+
+ return (intf->in_cons != intf->in_prod);
+}
+
+
+int
+xencons_ring_send(const char *data, unsigned len)
+{
+ struct xencons_interface *intf;
+ XENCONS_RING_IDX cons, prod;
+ int sent;
+
+ intf = xencons_interface();
+ cons = intf->out_cons;
+ prod = intf->out_prod;
+ sent = 0;
+
+ mb();
+ PANIC_IF((prod - cons) > sizeof(intf->out));
+
+ while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+ intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+ wmb();
+ intf->out_prod = prod;
+
+ notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+ return sent;
+
+}
+
+
+static xencons_receiver_func *xencons_receiver;
+
+void
+xencons_handle_input(void *unused)
+{
+ struct xencons_interface *intf;
+ XENCONS_RING_IDX cons, prod;
+
+ intf = xencons_interface();
+
+ cons = intf->in_cons;
+ prod = intf->in_prod;
+
+ /* XXX needs locking */
+ while (cons != prod) {
+ xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1);
+ cons++;
+ }
+
+ mb();
+ intf->in_cons = cons;
+
+ notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+ xencons_tx();
+}
+
+void
+xencons_ring_register_receiver(xencons_receiver_func *f)
+{
+ xencons_receiver = f;
+}
+
+int
+xencons_ring_init(void)
+{
+ int err;
+
+ if (!xen_start_info->console_evtchn)
+ return 0;
+
+ err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
+ "xencons", xencons_handle_input, NULL,
+ INTR_TYPE_MISC | INTR_MPSAFE, NULL);
+ if (err) {
+ XENPRINTF("XEN console request irq failed %i\n", err);
+ return err;
+ }
+
+ return 0;
+}
+#ifdef notyet
+void
+xencons_suspend(void)
+{
+
+ if (!xen_start_info->console_evtchn)
+ return;
+
+ unbind_evtchn_from_irqhandler(xen_start_info->console_evtchn, NULL);
+}
+
+void
+xencons_resume(void)
+{
+
+ (void)xencons_ring_init();
+}
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/console/xencons_ring.h b/sys/dev/xen/console/xencons_ring.h
new file mode 100644
index 0000000..fc97d95
--- /dev/null
+++ b/sys/dev/xen/console/xencons_ring.h
@@ -0,0 +1,20 @@
+/*
+ * $FreeBSD$
+ *
+ */
+#ifndef _XENCONS_RING_H
+#define _XENCONS_RING_H
+
+int xencons_ring_init(void);
+int xencons_ring_send(const char *data, unsigned len);
+void xencons_rx(char *buf, unsigned len);
+void xencons_tx(void);
+
+
+typedef void (xencons_receiver_func)(char *buf, unsigned len);
+void xencons_ring_register_receiver(xencons_receiver_func *f);
+
+void xencons_handle_input(void *unused);
+int xencons_has_input(void);
+
+#endif /* _XENCONS_RING_H */
diff --git a/sys/dev/xen/evtchn/evtchn_dev.c b/sys/dev/xen/evtchn/evtchn_dev.c
new file mode 100644
index 0000000..a206708
--- /dev/null
+++ b/sys/dev/xen/evtchn/evtchn_dev.c
@@ -0,0 +1,394 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <machine/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+#include <machine/hypervisor.h>
+
+
+typedef struct evtchn_sotfc {
+
+ struct selinfo ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+
+#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t evtchn_read;
+static d_write_t evtchn_write;
+static d_ioctl_t evtchn_ioctl;
+static d_poll_t evtchn_poll;
+static d_open_t evtchn_open;
+static d_close_t evtchn_close;
+
+
+void
+evtchn_device_upcall(int port)
+{
+ mtx_lock(&upcall_lock);
+
+ mask_evtchn(port);
+ clear_evtchn(port);
+
+ if ( ring != NULL ) {
+ if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+ ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
+ if ( ring_cons == ring_prod++ ) {
+ wakeup(evtchn_waddr);
+ }
+ }
+ else {
+ ring_overflow = 1;
+ }
+ }
+
+ mtx_unlock(&upcall_lock);
+}
+
+static void
+__evtchn_reset_buffer_ring(void)
+{
+ /* Initialise the ring to empty. Clear errors. */
+ ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ int rc;
+ unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+ count = uio->uio_resid;
+
+ count &= ~1; /* even number of bytes */
+
+ if ( count == 0 )
+ {
+ rc = 0;
+ goto out;
+ }
+
+ if ( count > PAGE_SIZE )
+ count = PAGE_SIZE;
+
+ for ( ; ; ) {
+ if ( (c = ring_cons) != (p = ring_prod) )
+ break;
+
+ if ( ring_overflow ) {
+ rc = EFBIG;
+ goto out;
+ }
+
+ if (sst != 0) {
+ rc = EINTR;
+ goto out;
+ }
+
+ /* PCATCH == check for signals before and after sleeping
+ * PWAIT == priority of waiting on resource
+ */
+ sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+ }
+
+ /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+ if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+ bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
+ }
+ else {
+ bytes1 = (p - c) * sizeof(uint16_t);
+ bytes2 = 0;
+ }
+
+ /* Truncate chunks according to caller's maximum byte count. */
+ if ( bytes1 > count ) {
+ bytes1 = count;
+ bytes2 = 0;
+ }
+ else if ( (bytes1 + bytes2) > count ) {
+ bytes2 = count - bytes1;
+ }
+
+ if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
+ ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+ /* keeping this around as its replacement is not equivalent
+ * copyout(&ring[0], &buf[bytes1], bytes2)
+ */
+ {
+ rc = EFAULT;
+ goto out;
+ }
+
+ ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+ rc = bytes1 + bytes2;
+
+ out:
+
+ return rc;
+}
+
+static int
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ int rc, i, count;
+
+ count = uio->uio_resid;
+
+ uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+ if ( kbuf == NULL )
+ return ENOMEM;
+
+ count &= ~1; /* even number of bytes */
+
+ if ( count == 0 ) {
+ rc = 0;
+ goto out;
+ }
+
+ if ( count > PAGE_SIZE )
+ count = PAGE_SIZE;
+
+ if ( uiomove(kbuf, count, uio) != 0 ) {
+ rc = EFAULT;
+ goto out;
+ }
+
+ mtx_lock_spin(&lock);
+ for ( i = 0; i < (count/2); i++ )
+ if ( test_bit(kbuf[i], &bound_ports[0]) )
+ unmask_evtchn(kbuf[i]);
+ mtx_unlock_spin(&lock);
+
+ rc = count;
+
+ out:
+ free(kbuf, M_DEVBUF);
+ return rc;
+}
+
+static int
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
+ int mode, struct thread *td __unused)
+{
+ int rc = 0;
+
+ mtx_lock_spin(&lock);
+
+ switch ( cmd )
+ {
+ case EVTCHN_RESET:
+ __evtchn_reset_buffer_ring();
+ break;
+ case EVTCHN_BIND:
+ if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
+ unmask_evtchn((int)arg);
+ else
+ rc = EINVAL;
+ break;
+ case EVTCHN_UNBIND:
+ if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
+ mask_evtchn((int)arg);
+ else
+ rc = EINVAL;
+ break;
+ default:
+ rc = ENOSYS;
+ break;
+ }
+
+ mtx_unlock_spin(&lock);
+
+ return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+ evtchn_softc_t *sc;
+ unsigned int mask = POLLOUT | POLLWRNORM;
+
+ sc = dev->si_drv1;
+
+ if ( ring_cons != ring_prod )
+ mask |= POLLIN | POLLRDNORM;
+ else if ( ring_overflow )
+ mask = POLLERR;
+ else
+ selrecord(td, &sc->ev_rsel);
+
+
+ return mask;
+}
+
+
+static int
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+ uint16_t *_ring;
+
+ if (flag & O_NONBLOCK)
+ return EBUSY;
+
+ if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+ return EBUSY;
+
+ if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+ return ENOMEM;
+
+ mtx_lock_spin(&lock);
+ ring = _ring;
+ __evtchn_reset_buffer_ring();
+ mtx_unlock_spin(&lock);
+
+
+ return 0;
+}
+
+static int
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+ int i;
+
+ mtx_lock_spin(&lock);
+ if (ring != NULL) {
+ free(ring, M_DEVBUF);
+ ring = NULL;
+ }
+ for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+ if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+ mask_evtchn(i);
+ mtx_unlock_spin(&lock);
+
+ evtchn_dev_inuse = 0;
+
+ return 0;
+}
+
+static struct cdevsw evtchn_devsw = {
+ d_version: D_VERSION,
+ d_open: evtchn_open,
+ d_close: evtchn_close,
+ d_read: evtchn_read,
+ d_write: evtchn_write,
+ d_ioctl: evtchn_ioctl,
+ d_poll: evtchn_poll,
+ d_name: "evtchn",
+ d_flags: 0,
+};
+
+
+/* XXX - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int
+evtchn_init(void *dummy __unused)
+{
+ /* XXX I believe we don't need these leaving them here for now until we
+ * have some semblance of it working
+ */
+ mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+ /* (DEVFS) create '/dev/misc/evtchn'. */
+ evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+ mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+ evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+ bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+ /* XXX I don't think we need any of this rubbish */
+#if 0
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+ return err;
+ }
+
+ /* (DEVFS) create directory '/dev/xen'. */
+ xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+ /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+ pos = devfs_generate_path(evtchn_miscdev.devfs_handle,
+ &link_dest[3],
+ sizeof(link_dest) - 3);
+ if ( pos >= 0 )
+ strncpy(&link_dest[pos], "../", 3);
+ /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+ (void)devfs_mk_symlink(xen_dev_dir,
+ "evtchn",
+ DEVFS_FL_DEFAULT,
+ &link_dest[pos],
+ &symlink_handle,
+ NULL);
+
+ /* (DEVFS) automatically destroy the symlink with its destination. */
+ devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+ printk("Event-channel device installed.\n");
+
+ return 0;
+}
+
+
+SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
+
+
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
new file mode 100644
index 0000000..950a68c
--- /dev/null
+++ b/sys/dev/xen/netback/netback.c
@@ -0,0 +1,1585 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_types.h>
+#include <net/ethernet.h>
+#include <net/if_bridgevar.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/in_cksum.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#ifdef XEN_NETBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+#define DDPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
+#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
+#else
+#define DDPRINTF(fmt, args...) ((void)0)
+#define DPRINTF_MBUF(_m) ((void)0)
+#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+ printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define BUG_ON PANIC_IF
+
+#define IFNAME(_np) (_np)->ifp->if_xname
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+struct ring_ref {
+ vm_offset_t va;
+ grant_handle_t handle;
+ uint64_t bus_addr;
+};
+
+typedef struct netback_info {
+
+ /* Schedule lists */
+ STAILQ_ENTRY(netback_info) next_tx;
+ STAILQ_ENTRY(netback_info) next_rx;
+ int on_tx_sched_list;
+ int on_rx_sched_list;
+
+ struct xenbus_device *xdev;
+ XenbusState frontend_state;
+
+ domid_t domid;
+ int handle;
+ char *bridge;
+
+ int rings_connected;
+ struct ring_ref tx_ring_ref;
+ struct ring_ref rx_ring_ref;
+ netif_tx_back_ring_t tx;
+ netif_rx_back_ring_t rx;
+ evtchn_port_t evtchn;
+ int irq;
+ void *irq_cookie;
+
+ struct ifnet *ifp;
+ int ref_cnt;
+
+ device_t ndev;
+ int attached;
+} netif_t;
+
+
+#define MAX_PENDING_REQS 256
+#define PKT_PROT_LEN 64
+
+static struct {
+ netif_tx_request_t req;
+ netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static uint16_t pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+/* Freed TX mbufs get batched on this ring before return to pending_ring. */
+static uint16_t dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct task net_tx_task, net_rx_task;
+static struct callout rx_task_callout;
+
+static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
+ STAILQ_HEAD_INITIALIZER(tx_sched_list);
+static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
+ STAILQ_HEAD_INITIALIZER(rx_sched_list);
+static struct mtx tx_sched_list_lock;
+static struct mtx rx_sched_list_lock;
+
+static int vif_unit_maker = 0;
+
+/* Protos */
+static void netback_start(struct ifnet *ifp);
+static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static int vif_add_dev(struct xenbus_device *xdev);
+static void disconnect_rings(netif_t *netif);
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+/* Debug code to display the contents of an mbuf */
+static void
+print_mbuf(struct mbuf *m, int max)
+{
+ int i, j=0;
+ printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
+ for (; m; m = m->m_next) {
+ unsigned char *d = m->m_data;
+ for (i=0; i < m->m_len; i++) {
+ if (max && j == max)
+ break;
+ if ((j++ % 16) == 0)
+ printf("\n%04x:", j);
+ printf(" %02x", d[i]);
+ }
+ }
+ printf("\n");
+}
+#endif
+
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+static unsigned long
+alloc_mfn(void)
+{
+ unsigned long mfn = 0;
+ struct xen_memory_reservation reservation = {
+ .extent_start = mfn_list,
+ .nr_extents = MAX_MFN_ALLOC,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ if ( unlikely(alloc_index == 0) )
+ alloc_index = HYPERVISOR_memory_op(
+ XENMEM_increase_reservation, &reservation);
+ if ( alloc_index != 0 )
+ mfn = mfn_list[--alloc_index];
+ return mfn;
+}
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+ void *pages;
+ int i = 0, j = 0;
+ multicall_entry_t mcl[17];
+ unsigned long mfn_list[16];
+ struct xen_memory_reservation reservation = {
+ .extent_start = mfn_list,
+ .nr_extents = 0,
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (pages == NULL)
+ return 0;
+
+ memset(mcl, 0, sizeof(mcl));
+
+ while (i < nr_pages) {
+ unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+ mcl[j].op = __HYPERVISOR_update_va_mapping;
+ mcl[j].args[0] = va;
+
+ mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+ xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+ if (j == 16 || i == nr_pages) {
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+ reservation.nr_extents = j;
+
+ mcl[j].op = __HYPERVISOR_memory_op;
+ mcl[j].args[0] = XENMEM_decrease_reservation;
+ mcl[j].args[1] = (unsigned long)&reservation;
+
+ (void)HYPERVISOR_multicall(mcl, j+1);
+
+ mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+ j = 0;
+ }
+ }
+
+ return (unsigned long)pages;
+}
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+static void
+fixup_checksum(struct mbuf *m)
+{
+ struct ether_header *eh = mtod(m, struct ether_header *);
+ struct ip *ip = (struct ip *)(eh + 1);
+ int iphlen = ip->ip_hl << 2;
+ int iplen = ntohs(ip->ip_len);
+
+ if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
+ struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(IPPROTO_TCP + (iplen - iphlen)));
+ th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+ } else {
+ u_short csum;
+ struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(IPPROTO_UDP + (iplen - iphlen)));
+ if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
+ csum = 0xffff;
+ uh->uh_sum = csum;
+ m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+ }
+}
+#endif
+
+/* Add the interface to the specified bridge */
+static int
+add_to_bridge(struct ifnet *ifp, char *bridge)
+{
+ struct ifdrv ifd;
+ struct ifbreq ifb;
+ struct ifnet *ifp_bridge = ifunit(bridge);
+
+ if (!ifp_bridge)
+ return ENOENT;
+
+ bzero(&ifd, sizeof(ifd));
+ bzero(&ifb, sizeof(ifb));
+
+ strcpy(ifb.ifbr_ifsname, ifp->if_xname);
+ strcpy(ifd.ifd_name, ifp->if_xname);
+ ifd.ifd_cmd = BRDGADD;
+ ifd.ifd_len = sizeof(ifb);
+ ifd.ifd_data = &ifb;
+
+ return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
+
+}
+
+static int
+netif_create(int handle, struct xenbus_device *xdev, char *bridge)
+{
+ netif_t *netif;
+ struct ifnet *ifp;
+
+ netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!netif)
+ return ENOMEM;
+
+ netif->ref_cnt = 1;
+ netif->handle = handle;
+ netif->domid = xdev->otherend_id;
+ netif->xdev = xdev;
+ netif->bridge = bridge;
+ xdev->data = netif;
+
+ /* Set up ifnet structure */
+ ifp = netif->ifp = if_alloc(IFT_ETHER);
+ if (!ifp) {
+ if (bridge)
+ free(bridge, M_DEVBUF);
+ free(netif, M_DEVBUF);
+ return ENOMEM;
+ }
+
+ ifp->if_softc = netif;
+ if_initname(ifp, "vif",
+ atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+ ifp->if_output = ether_output;
+ ifp->if_start = netback_start;
+ ifp->if_ioctl = netback_ioctl;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+
+ DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+
+ return 0;
+}
+
+static void
+netif_get(netif_t *netif)
+{
+ atomic_add_int(&netif->ref_cnt, 1);
+}
+
+static void
+netif_put(netif_t *netif)
+{
+ if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
+ DPRINTF("%s\n", IFNAME(netif));
+ disconnect_rings(netif);
+ if (netif->ifp) {
+ if_free(netif->ifp);
+ netif->ifp = NULL;
+ }
+ if (netif->bridge)
+ free(netif->bridge, M_DEVBUF);
+ free(netif, M_DEVBUF);
+ }
+}
+
+static int
+netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
+ IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
+ return 0;
+ }
+
+ DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
+
+ return ether_ioctl(ifp, cmd, data);
+}
+
+static inline void
+maybe_schedule_tx_action(void)
+{
+ smp_mb();
+ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
+ taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_tx_schedule_list(void)
+{
+ netif_t *netif;
+
+ mtx_lock(&tx_sched_list_lock);
+
+ if ((netif = STAILQ_FIRST(&tx_sched_list))) {
+ STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
+ STAILQ_NEXT(netif, next_tx) = NULL;
+ netif->on_tx_sched_list = 0;
+ }
+
+ mtx_unlock(&tx_sched_list_lock);
+
+ return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_tx_schedule_list_tail(netif_t *netif)
+{
+ if (netif->on_tx_sched_list)
+ return;
+
+ mtx_lock(&tx_sched_list_lock);
+ if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ netif_get(netif);
+ STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
+ netif->on_tx_sched_list = 1;
+ }
+ mtx_unlock(&tx_sched_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+static void
+netif_schedule_tx_work(netif_t *netif)
+{
+ int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+ if (more_to_do) {
+ DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
+ add_to_tx_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+ }
+}
+
+static struct mtx dealloc_lock;
+MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
+
+static void
+netif_idx_release(uint16_t pending_idx)
+{
+ mtx_lock_spin(&dealloc_lock);
+ dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+ mtx_unlock_spin(&dealloc_lock);
+
+ taskqueue_enqueue(taskqueue_swi, &net_tx_task);
+}
+
+static void
+make_tx_response(netif_t *netif,
+ uint16_t id,
+ int8_t st)
+{
+ RING_IDX i = netif->tx.rsp_prod_pvt;
+ netif_tx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netif->tx, i);
+ resp->id = id;
+ resp->status = st;
+
+ netif->tx.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+ if (notify)
+ notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ if (i == netif->tx.req_cons) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+ if (more_to_do)
+ add_to_tx_schedule_list_tail(netif);
+ }
+#endif
+}
+
+inline static void
+net_tx_action_dealloc(void)
+{
+ gnttab_unmap_grant_ref_t *gop;
+ uint16_t pending_idx;
+ PEND_RING_IDX dc, dp;
+ netif_t *netif;
+ int ret;
+
+ dc = dealloc_cons;
+ dp = dealloc_prod;
+
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = tx_unmap_ops;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->host_addr = MMAP_VADDR(pending_idx);
+ gop->dev_bus_addr = 0;
+ gop->handle = grant_tx_handle[pending_idx];
+ gop++;
+ }
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+ BUG_ON(ret);
+
+ while (dealloc_cons != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+ netif = pending_tx_info[pending_idx].netif;
+
+ make_tx_response(netif, pending_tx_info[pending_idx].req.id,
+ NETIF_RSP_OKAY);
+
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+ netif_put(netif);
+ }
+}
+
+static void
+netif_page_release(void *buf, void *args)
+{
+ uint16_t pending_idx = (unsigned int)args;
+
+ DDPRINTF("pending_idx=%u\n", pending_idx);
+
+ KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
+
+ netif_idx_release(pending_idx);
+}
+
+static void
+net_tx_action(void *context, int pending)
+{
+ struct mbuf *m;
+ netif_t *netif;
+ netif_tx_request_t txreq;
+ uint16_t pending_idx;
+ RING_IDX i;
+ gnttab_map_grant_ref_t *mop;
+ int ret, work_to_do;
+ struct mbuf *txq = NULL, *txq_last = NULL;
+
+ if (dealloc_cons != dealloc_prod)
+ net_tx_action_dealloc();
+
+ mop = tx_map_ops;
+ while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
+
+ /* Get a netif from the list with work to do. */
+ netif = remove_from_tx_schedule_list();
+
+ DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
+ IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+ if (!work_to_do) {
+ netif_put(netif);
+ continue;
+ }
+
+ i = netif->tx.req_cons;
+ rmb(); /* Ensure that we see the request before we copy it. */
+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
+ /* If we want credit-based scheduling, coud add it here - WORK */
+
+ netif->tx.req_cons++;
+
+ netif_schedule_tx_work(netif);
+
+ if (unlikely(txreq.size < ETHER_HDR_LEN) ||
+ unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
+ WPRINTF("Bad packet size: %d\n", txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ /* No crossing a page as the payload mustn't fragment. */
+ if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+ WPRINTF("txreq.offset: %x, size: %u, end: %u\n",
+ txreq.offset, txreq.size,
+ (txreq.offset & PAGE_MASK) + txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (!m) {
+ WPRINTF("Failed to allocate mbuf\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ break;
+ }
+ m->m_pkthdr.rcvif = netif->ifp;
+
+ if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
+ struct mbuf *n;
+ MGET(n, M_DONTWAIT, MT_DATA);
+ if (!(m->m_next = n)) {
+ m_freem(m);
+ WPRINTF("Failed to allocate second mbuf\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ break;
+ }
+ n->m_len = txreq.size - PKT_PROT_LEN;
+ m->m_len = PKT_PROT_LEN;
+ } else
+ m->m_len = txreq.size;
+
+ mop->host_addr = MMAP_VADDR(pending_idx);
+ mop->dom = netif->domid;
+ mop->ref = txreq.gref;
+ mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+ mop++;
+
+ memcpy(&pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
+ pending_tx_info[pending_idx].netif = netif;
+ *((uint16_t *)m->m_data) = pending_idx;
+
+ if (txq_last)
+ txq_last->m_nextpkt = m;
+ else
+ txq = m;
+ txq_last = m;
+
+ pending_cons++;
+
+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+ break;
+ }
+
+ if (!txq)
+ return;
+
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+ BUG_ON(ret);
+
+ mop = tx_map_ops;
+ while ((m = txq) != NULL) {
+ caddr_t data;
+
+ txq = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ pending_idx = *((uint16_t *)m->m_data);
+ netif = pending_tx_info[pending_idx].netif;
+ memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
+
+ /* Check the remap error code. */
+ if (unlikely(mop->status)) {
+ WPRINTF("#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ m_freem(m);
+ mop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+
+#if 0
+ /* Can't do this in FreeBSD since vtophys() returns the pfn */
+ /* of the remote domain who loaned us the machine page - DPT */
+ xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
+ mop->dev_bus_addr >> PAGE_SHIFT;
+#endif
+ grant_tx_handle[pending_idx] = mop->handle;
+
+ /* Setup data in mbuf (lengths are already set) */
+ data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
+ bcopy(data, m->m_data, m->m_len);
+ if (m->m_next) {
+ struct mbuf *n = m->m_next;
+ MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
+ (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
+ n->m_data = &data[PKT_PROT_LEN];
+ } else {
+ /* Schedule a response immediately. */
+ netif_idx_release(pending_idx);
+ }
+
+ if ((txreq.flags & NETTXF_data_validated)) {
+ /* Tell the stack the checksums are okay */
+ m->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+
+ /* If necessary, inform stack to compute the checksums if it forwards the packet */
+ if ((txreq.flags & NETTXF_csum_blank)) {
+ struct ether_header *eh = mtod(m, struct ether_header *);
+ if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
+ struct ip *ip = (struct ip *)&m->m_data[14];
+ if (ip->ip_p == IPPROTO_TCP)
+ m->m_pkthdr.csum_flags |= CSUM_TCP;
+ else if (ip->ip_p == IPPROTO_UDP)
+ m->m_pkthdr.csum_flags |= CSUM_UDP;
+ }
+ }
+
+ netif->ifp->if_ibytes += m->m_pkthdr.len;
+ netif->ifp->if_ipackets++;
+
+ DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
+ m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
+ DPRINTF_MBUF_LEN(m, 128);
+
+ (*netif->ifp->if_input)(netif->ifp, m);
+
+ mop++;
+ }
+}
+
+/* Handle interrupt from a frontend */
+static void
+netback_intr(void *arg)
+{
+ netif_t *netif = arg;
+ DDPRINTF("%s\n", IFNAME(netif));
+ add_to_tx_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_rx_schedule_list(void)
+{
+ netif_t *netif;
+
+ mtx_lock(&rx_sched_list_lock);
+
+ if ((netif = STAILQ_FIRST(&rx_sched_list))) {
+ STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
+ STAILQ_NEXT(netif, next_rx) = NULL;
+ netif->on_rx_sched_list = 0;
+ }
+
+ mtx_unlock(&rx_sched_list_lock);
+
+ return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_rx_schedule_list_tail(netif_t *netif)
+{
+ if (netif->on_rx_sched_list)
+ return;
+
+ mtx_lock(&rx_sched_list_lock);
+ if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ netif_get(netif);
+ STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
+ netif->on_rx_sched_list = 1;
+ }
+ mtx_unlock(&rx_sched_list_lock);
+}
+
+static int
+make_rx_response(netif_t *netif, uint16_t id, int8_t st,
+ uint16_t offset, uint16_t size, uint16_t flags)
+{
+ RING_IDX i = netif->rx.rsp_prod_pvt;
+ netif_rx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netif->rx, i);
+ resp->offset = offset;
+ resp->flags = flags;
+ resp->id = id;
+ resp->status = (int16_t)size;
+ if (st < 0)
+ resp->status = (int16_t)st;
+
+ DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
+ i, resp->offset, resp->flags, resp->id, resp->status);
+
+ netif->rx.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+
+ return notify;
+}
+
+static int
+netif_rx(netif_t *netif)
+{
+ struct ifnet *ifp = netif->ifp;
+ struct mbuf *m;
+ multicall_entry_t *mcl;
+ mmu_update_t *mmu;
+ gnttab_transfer_t *gop;
+ unsigned long vdata, old_mfn, new_mfn;
+ struct mbuf *rxq = NULL, *rxq_last = NULL;
+ int ret, notify = 0, pkts_dequeued = 0;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ mcl = rx_mcl;
+ mmu = rx_mmu;
+ gop = grant_rx_op;
+
+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+
+ /* Quit if the target domain has no receive buffers */
+ if (netif->rx.req_cons == netif->rx.sring->req_prod)
+ break;
+
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ pkts_dequeued++;
+
+ /* Check if we need to copy the data */
+ if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
+ (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
+ struct mbuf *n;
+
+ DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
+ m->m_flags,
+ (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
+ (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
+ (unsigned int)m->m_next);
+
+ /* Make copy */
+ MGETHDR(n, M_DONTWAIT, MT_DATA);
+ if (!n)
+ goto drop;
+
+ MCLGET(n, M_DONTWAIT);
+ if (!(n->m_flags & M_EXT)) {
+ m_freem(n);
+ goto drop;
+ }
+
+ /* Leave space at front and keep current alignment */
+ n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
+
+ if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
+ WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
+ m_freem(n);
+ goto drop;
+ }
+ m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
+ n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
+ n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
+ m_freem(m);
+ m = n;
+ }
+
+ vdata = (unsigned long)m->m_data;
+ old_mfn = vtomach(vdata) >> PAGE_SHIFT;
+
+ if ((new_mfn = alloc_mfn()) == 0)
+ goto drop;
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+ /* Check if we need to compute a checksum. This happens */
+ /* when bridging from one domain to another. */
+ if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+ fixup_checksum(m);
+#endif
+
+ xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
+
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = vdata;
+ mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
+ mcl->args[2] = 0;
+ mcl->args[3] = 0;
+ mcl++;
+
+ gop->mfn = old_mfn;
+ gop->domid = netif->domid;
+ gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
+ netif->rx.req_cons++;
+ gop++;
+
+ mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ mmu->val = vtophys(vdata) >> PAGE_SHIFT;
+ mmu++;
+
+ if (rxq_last)
+ rxq_last->m_nextpkt = m;
+ else
+ rxq = m;
+ rxq_last = m;
+
+ DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
+ DPRINTF_MBUF_LEN(m, 128);
+
+ /* Filled the batch queue? */
+ if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
+ break;
+
+ continue;
+ drop:
+ DDPRINTF("dropping pkt\n");
+ ifp->if_oerrors++;
+ m_freem(m);
+ }
+
+ if (mcl == rx_mcl)
+ return pkts_dequeued;
+
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = mmu - rx_mmu;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ mcl++;
+
+ mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+ ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+ BUG_ON(ret != 0);
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
+ BUG_ON(ret != 0);
+
+ mcl = rx_mcl;
+ gop = grant_rx_op;
+
+ while ((m = rxq) != NULL) {
+ int8_t status;
+ uint16_t id, flags = 0;
+
+ rxq = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /* Rederive the machine addresses. */
+ new_mfn = mcl->args[1] >> PAGE_SHIFT;
+ old_mfn = gop->mfn;
+
+ ifp->if_obytes += m->m_pkthdr.len;
+ ifp->if_opackets++;
+
+ /* The update_va_mapping() must not fail. */
+ BUG_ON(mcl->result != 0);
+
+ /* Setup flags */
+ if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+ flags |= NETRXF_data_validated;
+
+ /* Check the reassignment error code. */
+ status = NETIF_RSP_OKAY;
+ if (gop->status != 0) {
+ DPRINTF("Bad status %d from grant transfer to DOM%u\n",
+ gop->status, netif->domid);
+ /*
+ * Page no longer belongs to us unless GNTST_bad_page,
+ * but that should be a fatal error anyway.
+ */
+ BUG_ON(gop->status == GNTST_bad_page);
+ status = NETIF_RSP_ERROR;
+ }
+ id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
+ notify |= make_rx_response(netif, id, status,
+ (unsigned long)m->m_data & PAGE_MASK,
+ m->m_pkthdr.len, flags);
+
+ m_freem(m);
+ mcl++;
+ gop++;
+ }
+
+ if (notify)
+ notify_remote_via_irq(netif->irq);
+
+ return pkts_dequeued;
+}
+
+static void
+rx_task_timer(void *arg)
+{
+ DDPRINTF("\n");
+ taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+}
+
+static void
+net_rx_action(void *context, int pending)
+{
+ netif_t *netif, *last_zero_work = NULL;
+
+ DDPRINTF("\n");
+
+ while ((netif = remove_from_rx_schedule_list())) {
+ struct ifnet *ifp = netif->ifp;
+
+ if (netif == last_zero_work) {
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ add_to_rx_schedule_list_tail(netif);
+ netif_put(netif);
+ if (!STAILQ_EMPTY(&rx_sched_list))
+ callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
+ break;
+ }
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (netif_rx(netif))
+ last_zero_work = NULL;
+ else if (!last_zero_work)
+ last_zero_work = netif;
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ add_to_rx_schedule_list_tail(netif);
+ }
+
+ netif_put(netif);
+ }
+}
+
+static void
+netback_start(struct ifnet *ifp)
+{
+ netif_t *netif = (netif_t *)ifp->if_softc;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ add_to_rx_schedule_list_tail(netif);
+ taskqueue_enqueue(taskqueue_swi, &net_rx_task);
+}
+
+/* Map a grant ref to a ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+ struct gnttab_map_grant_ref op;
+
+ ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ if (ring->va == 0)
+ return ENOMEM;
+
+ op.host_addr = ring->va;
+ op.flags = GNTMAP_host_map;
+ op.ref = ref;
+ op.dom = dom;
+ HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+ if (op.status) {
+ WPRINTF("grant table op err=%d\n", op.status);
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+ return EACCES;
+ }
+
+ ring->handle = op.handle;
+ ring->bus_addr = op.dev_bus_addr;
+
+ return 0;
+}
+
+/* Unmap grant ref for a ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+ struct gnttab_unmap_grant_ref op;
+
+ op.host_addr = ring->va;
+ op.dev_bus_addr = ring->bus_addr;
+ op.handle = ring->handle;
+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+ if (op.status)
+ WPRINTF("grant table op err=%d\n", op.status);
+
+ kmem_free(kernel_map, ring->va, PAGE_SIZE);
+ ring->va = 0;
+}
+
+static int
+connect_rings(netif_t *netif)
+{
+ struct xenbus_device *xdev = netif->xdev;
+ netif_tx_sring_t *txs;
+ netif_rx_sring_t *rxs;
+ unsigned long tx_ring_ref, rx_ring_ref;
+ evtchn_port_t evtchn;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
+
+ // Grab FE data and map his memory
+ err = xenbus_gather(NULL, xdev->otherend,
+ "tx-ring-ref", "%lu", &tx_ring_ref,
+ "rx-ring-ref", "%lu", &rx_ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_fatal(xdev, err,
+ "reading %s/ring-ref and event-channel",
+ xdev->otherend);
+ return err;
+ }
+
+ err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "mapping tx ring");
+ return err;
+ }
+ txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+ err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
+ if (err) {
+ unmap_ring(&netif->tx_ring_ref);
+ xenbus_dev_fatal(xdev, err, "mapping rx ring");
+ return err;
+ }
+ rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+ op.u.bind_interdomain.remote_dom = netif->domid;
+ op.u.bind_interdomain.remote_port = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_ring(&netif->tx_ring_ref);
+ unmap_ring(&netif->rx_ring_ref);
+ xenbus_dev_fatal(xdev, err, "binding event channel");
+ return err;
+ }
+ netif->evtchn = op.u.bind_interdomain.local_port;
+
+ /* bind evtchn to irq handler */
+ netif->irq =
+ bind_evtchn_to_irqhandler(netif->evtchn, "netback",
+ netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
+
+ netif->rings_connected = 1;
+
+ DPRINTF("%s connected! evtchn=%d irq=%d\n",
+ IFNAME(netif), netif->evtchn, netif->irq);
+
+ return 0;
+}
+
+static void
+disconnect_rings(netif_t *netif)
+{
+ DPRINTF("\n");
+
+ if (netif->rings_connected) {
+ unbind_from_irqhandler(netif->irq, netif->irq_cookie);
+ netif->irq = 0;
+ unmap_ring(&netif->tx_ring_ref);
+ unmap_ring(&netif->rx_ring_ref);
+ netif->rings_connected = 0;
+ }
+}
+
+static void
+connect(netif_t *netif)
+{
+ if (!netif->xdev ||
+ !netif->attached ||
+ netif->frontend_state != XenbusStateConnected) {
+ return;
+ }
+
+ if (!connect_rings(netif)) {
+ xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
+
+ /* Turn on interface */
+ netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ netif->ifp->if_flags |= IFF_UP;
+ }
+}
+
+static int
+netback_remove(struct xenbus_device *xdev)
+{
+ netif_t *netif = xdev->data;
+ device_t ndev;
+
+ DPRINTF("remove %s\n", xdev->nodename);
+
+ if ((ndev = netif->ndev)) {
+ netif->ndev = NULL;
+ mtx_lock(&Giant);
+ device_detach(ndev);
+ mtx_unlock(&Giant);
+ }
+
+ xdev->data = NULL;
+ netif->xdev = NULL;
+ netif_put(netif);
+
+ return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the frontend.
+ * Switch to Connected state.
+ */
+static int
+netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+ int err;
+ long handle;
+ char *bridge;
+
+ DPRINTF("node=%s\n", xdev->nodename);
+
+ /* Grab the handle */
+ err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
+ if (err != 1) {
+ xenbus_dev_fatal(xdev, err, "reading handle");
+ return err;
+ }
+
+ /* Check for bridge */
+ bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
+ if (IS_ERR(bridge))
+ bridge = NULL;
+
+ err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "writing switch state");
+ return err;
+ }
+
+ err = netif_create(handle, xdev, bridge);
+ if (err) {
+ xenbus_dev_fatal(xdev, err, "creating netif");
+ return err;
+ }
+
+ err = vif_add_dev(xdev);
+ if (err) {
+ netif_put((netif_t *)xdev->data);
+ xenbus_dev_fatal(xdev, err, "adding vif device");
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netback_resume(struct xenbus_device *xdev)
+{
+ DPRINTF("node=%s\n", xdev->nodename);
+ return 0;
+}
+
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *xdev,
+ XenbusState frontend_state)
+{
+ netif_t *netif = xdev->data;
+
+ DPRINTF("state=%d\n", frontend_state);
+
+ netif->frontend_state = frontend_state;
+
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ break;
+ case XenbusStateConnected:
+ connect(netif);
+ break;
+ case XenbusStateClosing:
+ xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+ break;
+ case XenbusStateClosed:
+ xenbus_remove_device(xdev);
+ break;
+ case XenbusStateUnknown:
+ case XenbusStateInitWait:
+ xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id netback_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static struct xenbus_driver netback = {
+ .name = "netback",
+ .ids = netback_ids,
+ .probe = netback_probe,
+ .remove = netback_remove,
+ .resume= netback_resume,
+ .otherend_changed = frontend_changed,
+};
+
+static void
+netback_init(void *unused)
+{
+ callout_init(&rx_task_callout, CALLOUT_MPSAFE);
+
+ mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
+ BUG_ON(!mmap_vstart);
+
+ pending_cons = 0;
+ for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
+ pending_ring[pending_prod] = pending_prod;
+
+ TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
+ TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
+ mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
+ mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
+
+ DPRINTF("registering %s\n", netback.name);
+
+ xenbus_register_backend(&netback);
+}
+
+SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
+
+static int
+vif_add_dev(struct xenbus_device *xdev)
+{
+ netif_t *netif = xdev->data;
+ device_t nexus, ndev;
+ devclass_t dc;
+ int err = 0;
+
+ mtx_lock(&Giant);
+
+ /* We will add a vif device as a child of nexus0 (for now) */
+ if (!(dc = devclass_find("nexus")) ||
+ !(nexus = devclass_get_device(dc, 0))) {
+ WPRINTF("could not find nexus0!\n");
+ err = ENOENT;
+ goto done;
+ }
+
+
+ /* Create a newbus device representing the vif */
+ ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
+ if (!ndev) {
+ WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
+ err = EFAULT;
+ goto done;
+ }
+
+ netif_get(netif);
+ device_set_ivars(ndev, netif);
+ netif->ndev = ndev;
+
+ device_probe_and_attach(ndev);
+
+ done:
+
+ mtx_unlock(&Giant);
+
+ return err;
+}
+
+enum {
+ VIF_SYSCTL_DOMID,
+ VIF_SYSCTL_HANDLE,
+ VIF_SYSCTL_TXRING,
+ VIF_SYSCTL_RXRING,
+};
+
+static char *
+vif_sysctl_ring_info(netif_t *netif, int cmd)
+{
+ char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+ if (buf) {
+ if (!netif->rings_connected)
+ sprintf(buf, "rings not connected\n");
+ else if (cmd == VIF_SYSCTL_TXRING) {
+ netif_tx_back_ring_t *tx = &netif->tx;
+ sprintf(buf, "nr_ents=%x req_cons=%x"
+ " req_prod=%x req_event=%x"
+ " rsp_prod=%x rsp_event=%x",
+ tx->nr_ents, tx->req_cons,
+ tx->sring->req_prod, tx->sring->req_event,
+ tx->sring->rsp_prod, tx->sring->rsp_event);
+ } else {
+ netif_rx_back_ring_t *rx = &netif->rx;
+ sprintf(buf, "nr_ents=%x req_cons=%x"
+ " req_prod=%x req_event=%x"
+ " rsp_prod=%x rsp_event=%x",
+ rx->nr_ents, rx->req_cons,
+ rx->sring->req_prod, rx->sring->req_event,
+ rx->sring->rsp_prod, rx->sring->rsp_event);
+ }
+ }
+ return buf;
+}
+
+static int
+vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ device_t dev = (device_t)arg1;
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ const char *value;
+ char *buf = NULL;
+ int err;
+
+ switch (arg2) {
+ case VIF_SYSCTL_DOMID:
+ return sysctl_handle_int(oidp, NULL, netif->domid, req);
+ case VIF_SYSCTL_HANDLE:
+ return sysctl_handle_int(oidp, NULL, netif->handle, req);
+ case VIF_SYSCTL_TXRING:
+ case VIF_SYSCTL_RXRING:
+ value = buf = vif_sysctl_ring_info(netif, arg2);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ err = SYSCTL_OUT(req, value, strlen(value));
+ if (buf != NULL)
+ free(buf, M_DEVBUF);
+
+ return err;
+}
+
+/* Newbus vif device driver probe */
+static int
+vif_probe(device_t dev)
+{
+ DDPRINTF("vif%d\n", device_get_unit(dev));
+ return 0;
+}
+
+/* Newbus vif device driver attach */
+static int
+vif_attach(device_t dev)
+{
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ uint8_t mac[ETHER_ADDR_LEN];
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
+ "domid of frontend");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
+ dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
+ "handle of frontend");
+#ifdef XEN_NETBACK_DEBUG
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "txring", CTLFLAG_RD,
+ dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
+ "tx ring info");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "rxring", CTLFLAG_RD,
+ dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
+ "rx ring info");
+#endif
+
+ memset(mac, 0xff, sizeof(mac));
+ mac[0] &= ~0x01;
+
+ ether_ifattach(netif->ifp, mac);
+ netif->attached = 1;
+
+ connect(netif);
+
+ if (netif->bridge) {
+ DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
+ int err = add_to_bridge(netif->ifp, netif->bridge);
+ if (err) {
+ WPRINTF("Error adding %s to %s; err=%d\n",
+ IFNAME(netif), netif->bridge, err);
+ }
+ }
+
+ return bus_generic_attach(dev);
+}
+
+/* Newbus vif device driver detach */
+static int
+vif_detach(device_t dev)
+{
+ netif_t *netif = (netif_t *)device_get_ivars(dev);
+ struct ifnet *ifp = netif->ifp;
+
+ DDPRINTF("%s\n", IFNAME(netif));
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ ether_ifdetach(ifp);
+
+ bus_generic_detach(dev);
+
+ netif->attached = 0;
+
+ netif_put(netif);
+
+ return 0;
+}
+
+static device_method_t vif_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vif_probe),
+ DEVMETHOD(device_attach, vif_attach),
+ DEVMETHOD(device_detach, vif_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+ {0, 0}
+};
+
+static devclass_t vif_devclass;
+
+static driver_t vif_driver = {
+ "vif",
+ vif_methods,
+ 0,
+};
+
+DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/netfront/mbufq.h b/sys/dev/xen/netfront/mbufq.h
new file mode 100644
index 0000000..0d6c604
--- /dev/null
+++ b/sys/dev/xen/netfront/mbufq.h
@@ -0,0 +1,123 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef CXGB_MBUFQ_H_
+#define CXGB_MBUFQ_H_
+
+struct mbuf_head {
+ struct mbuf *head;
+ struct mbuf *tail;
+ uint32_t qlen;
+ uint32_t qsize;
+ struct mtx lock;
+};
+
+static __inline void
+mbufq_init(struct mbuf_head *l)
+{
+ l->head = l->tail = NULL;
+ l->qlen = l->qsize = 0;
+}
+
+static __inline int
+mbufq_empty(struct mbuf_head *l)
+{
+ return (l->head == NULL);
+}
+
+static __inline int
+mbufq_len(struct mbuf_head *l)
+{
+ return (l->qlen);
+}
+
+static __inline int
+mbufq_size(struct mbuf_head *l)
+{
+ return (l->qsize);
+}
+
+static __inline int
+mbufq_head_size(struct mbuf_head *l)
+{
+ return (l->head ? l->head->m_pkthdr.len : 0);
+}
+
+static __inline void
+mbufq_tail(struct mbuf_head *l, struct mbuf *m)
+{
+ l->qlen++;
+ if (l->head == NULL)
+ l->head = m;
+ else
+ l->tail->m_nextpkt = m;
+ l->tail = m;
+ l->qsize += m->m_pkthdr.len;
+}
+
+static __inline struct mbuf *
+mbufq_dequeue(struct mbuf_head *l)
+{
+ struct mbuf *m;
+
+ m = l->head;
+ if (m) {
+ if (m == l->tail)
+ l->head = l->tail = NULL;
+ else
+ l->head = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ l->qlen--;
+ l->qsize -= m->m_pkthdr.len;
+ }
+
+ return (m);
+}
+
+static __inline struct mbuf *
+mbufq_peek(struct mbuf_head *l)
+{
+ return (l->head);
+}
+
+static __inline void
+mbufq_append(struct mbuf_head *a, struct mbuf_head *b)
+{
+ if (a->tail)
+ a->tail->m_nextpkt = b->head;
+ if (b->tail)
+ a->tail = b->tail;
+ a->qlen += b->qlen;
+ a->qsize += b->qsize;
+
+
+}
+#endif /* CXGB_MBUFQ_H_ */
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
new file mode 100644
index 0000000..fd174b6
--- /dev/null
+++ b/sys/dev/xen/netfront/netfront.c
@@ -0,0 +1,1829 @@
+/*
+ *
+ * Copyright (c) 2004-2006 Kip Macy
+ * All rights reserved.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/sx.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/clock.h> /* for DELAY */
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/hypervisor.h>
+#include <machine/xen/xen_intr.h>
+#include <machine/xen/evtchn.h>
+#include <machine/xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/interface/memory.h>
+#include <dev/xen/netfront/mbufq.h>
+#include <machine/xen/features.h>
+#include <xen/interface/io/netif.h>
+
+
+#define GRANT_INVALID_REF 0
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
+#define RX_COPY_THRESHOLD 256
+
+#define net_ratelimit() 0
+
+struct netfront_info;
+struct netfront_rx_info;
+
+static void xn_txeof(struct netfront_info *);
+static void xn_rxeof(struct netfront_info *);
+static void network_alloc_rx_buffers(struct netfront_info *);
+
+static void xn_tick_locked(struct netfront_info *);
+static void xn_tick(void *);
+
+static void xn_intr(void *);
+static void xn_start_locked(struct ifnet *);
+static void xn_start(struct ifnet *);
+static int xn_ioctl(struct ifnet *, u_long, caddr_t);
+static void xn_ifinit_locked(struct netfront_info *);
+static void xn_ifinit(void *);
+static void xn_stop(struct netfront_info *);
+#ifdef notyet
+static void xn_watchdog(struct ifnet *);
+#endif
+
+static void show_device(struct netfront_info *sc);
+#ifdef notyet
+static void netfront_closing(struct xenbus_device *dev);
+#endif
+static void netif_free(struct netfront_info *info);
+static int netfront_remove(struct xenbus_device *dev);
+
+static int talk_to_backend(struct xenbus_device *dev, struct netfront_info *info);
+static int create_netdev(struct xenbus_device *dev, struct ifnet **ifp);
+static void netif_disconnect_backend(struct netfront_info *info);
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info);
+static void end_access(int ref, void *page);
+
+/* Xenolinux helper functions */
+static int network_connect(struct ifnet *ifp);
+
+static void xn_free_rx_ring(struct netfront_info *);
+
+static void xn_free_tx_ring(struct netfront_info *);
+
+static int xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf_head *list,
+ int *pages_flipped_p);
+
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+#define INVALID_P2M_ENTRY (~0UL)
+
+/*
+ * Mbuf pointers. We need these to keep track of the virtual addresses
+ * of our mbuf chains since we can only convert from virtual to physical,
+ * not the other way around. The size must track the free index arrays.
+ */
+struct xn_chain_data {
+ struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1];
+ struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1];
+};
+
+
+struct net_device_stats
+{
+ u_long rx_packets; /* total packets received */
+ u_long tx_packets; /* total packets transmitted */
+ u_long rx_bytes; /* total bytes received */
+ u_long tx_bytes; /* total bytes transmitted */
+ u_long rx_errors; /* bad packets received */
+ u_long tx_errors; /* packet transmit problems */
+ u_long rx_dropped; /* no space in linux buffers */
+ u_long tx_dropped; /* no space available in linux */
+ u_long multicast; /* multicast packets received */
+ u_long collisions;
+
+ /* detailed rx_errors: */
+ u_long rx_length_errors;
+ u_long rx_over_errors; /* receiver ring buff overflow */
+ u_long rx_crc_errors; /* recved pkt with crc error */
+ u_long rx_frame_errors; /* recv'd frame alignment error */
+ u_long rx_fifo_errors; /* recv'r fifo overrun */
+ u_long rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ u_long tx_aborted_errors;
+ u_long tx_carrier_errors;
+ u_long tx_fifo_errors;
+ u_long tx_heartbeat_errors;
+ u_long tx_window_errors;
+
+ /* for cslip etc */
+ u_long rx_compressed;
+ u_long tx_compressed;
+};
+
+struct netfront_info {
+
+ struct ifnet *xn_ifp;
+
+ struct net_device_stats stats;
+ u_int tx_full;
+
+ netif_tx_front_ring_t tx;
+ netif_rx_front_ring_t rx;
+
+ struct mtx tx_lock;
+ struct mtx rx_lock;
+ struct sx sc_lock;
+
+ u_int handle;
+ u_int irq;
+ u_int copying_receiver;
+ u_int carrier;
+
+ /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 32
+#define RX_MAX_TARGET NET_RX_RING_SIZE
+ int rx_min_target, rx_max_target, rx_target;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+ * array is an index into a chain of free entries.
+ */
+
+ grant_ref_t gref_tx_head;
+ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
+ grant_ref_t gref_rx_head;
+ grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1];
+
+#define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
+ struct xenbus_device *xbdev;
+ int tx_ring_ref;
+ int rx_ring_ref;
+ uint8_t mac[ETHER_ADDR_LEN];
+ struct xn_chain_data xn_cdata; /* mbufs */
+ struct mbuf_head xn_rx_batch; /* head of the batch queue */
+
+ int xn_if_flags;
+ struct callout xn_stat_ch;
+
+ u_long rx_pfn_array[NET_RX_RING_SIZE];
+ multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+ mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+};
+
+#define rx_mbufs xn_cdata.xn_rx_chain
+#define tx_mbufs xn_cdata.xn_tx_chain
+
+#define XN_LOCK_INIT(_sc, _name) \
+ mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
+ mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \
+ sx_init(&(_sc)->sc_lock, #_name"_rx")
+
+#define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock)
+#define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock)
+
+#define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock)
+#define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock)
+
+#define XN_LOCK(_sc) sx_xlock(&(_sc)->sc_lock);
+#define XN_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_lock);
+
+#define XN_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_lock, SX_LOCKED);
+#define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED);
+#define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED);
+#define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \
+ mtx_destroy(&(_sc)->tx_lock); \
+ sx_destroy(&(_sc)->sc_lock);
+
+struct netfront_rx_info {
+ struct netif_rx_response rx;
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+#define netfront_carrier_on(netif) ((netif)->carrier = 1)
+#define netfront_carrier_off(netif) ((netif)->carrier = 0)
+#define netfront_carrier_ok(netif) ((netif)->carrier)
+
+/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
+
+
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static inline void
+add_id_to_freelist(struct mbuf **list, unsigned short id)
+{
+ list[id] = list[0];
+ list[0] = (void *)(u_long)id;
+}
+
+static inline unsigned short
+get_id_from_freelist(struct mbuf **list)
+{
+ u_int id = (u_int)(u_long)list[0];
+ list[0] = list[id];
+ return (id);
+}
+
+static inline int
+xennet_rxidx(RING_IDX idx)
+{
+ return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static inline struct mbuf *
+xennet_get_rx_mbuf(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ struct mbuf *m;
+
+ m = np->rx_mbufs[i];
+ np->rx_mbufs[i] = NULL;
+ return (m);
+}
+
+static inline grant_ref_t
+xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ grant_ref_t ref = np->grant_rx_ref[i];
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ return ref;
+}
+
+#ifdef DEBUG
+
+#endif
+#define IPRINTK(fmt, args...) \
+ printf("[XEN] " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+ printf("[XEN] " fmt, ##args)
+#define DPRINTK(fmt, args...) \
+ printf("[XEN] " fmt, ##args)
+
+
+static __inline struct mbuf*
+makembuf (struct mbuf *buf)
+{
+ struct mbuf *m = NULL;
+
+ MGETHDR (m, M_DONTWAIT, MT_DATA);
+
+ if (! m)
+ return 0;
+
+ M_MOVE_PKTHDR(m, buf);
+
+ m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
+ m->m_pkthdr.len = buf->m_pkthdr.len;
+ m->m_len = buf->m_len;
+ m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
+
+ m->m_ext.ext_arg1 = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
+
+ return m;
+}
+
+/**
+ * Read the 'mac' node at the given device's node in the store, and parse that
+ * as colon-separated octets, placing result the given mac array. mac must be
+ * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
+ * Return 0 on success, or errno on error.
+ */
+static int
+xen_net_read_mac(struct xenbus_device *dev, uint8_t mac[])
+{
+ char *s;
+ int i;
+ char *e;
+ char *macstr = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr)) {
+ return PTR_ERR(macstr);
+ }
+ s = macstr;
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ mac[i] = strtoul(s, &e, 16);
+ if (s == e || (e[0] != ':' && e[0] != 0)) {
+ free(macstr, M_DEVBUF);
+ return ENOENT;
+ }
+ s = &e[1];
+ }
+ free(macstr, M_DEVBUF);
+ return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those. Switch to
+ * Connected state.
+ */
+static int
+netfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id)
+{
+ int err;
+ struct ifnet *ifp;
+ struct netfront_info *info;
+
+ printf("netfront_probe() \n");
+
+ err = create_netdev(dev, &ifp);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "creating netdev");
+ return err;
+ }
+
+ info = ifp->if_softc;
+ dev->dev_driver_data = info;
+
+
+ return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int
+netfront_resume(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("%s\n", dev->nodename);
+
+ netif_disconnect_backend(info);
+ return (0);
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int
+talk_to_backend(struct xenbus_device *dev, struct netfront_info *info)
+{
+ const char *message;
+ struct xenbus_transaction xbt;
+ int err;
+
+ err = xen_net_read_mac(dev, info->mac);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+ goto out;
+ }
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_device(dev, info);
+ if (err)
+ goto out;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto destroy_ring;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
+ info->tx_ring_ref);
+ if (err) {
+ message = "writing tx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
+ info->rx_ring_ref);
+ if (err) {
+ message = "writing rx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "event-channel", "%u", irq_to_evtchn_port(info->irq));
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+ info->copying_receiver);
+ if (err) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-notify";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
+ if (err) {
+ message = "writing feature-no-csum-offload";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+#ifdef HAVE_TSO
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+#endif
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto destroy_ring;
+ }
+
+ return 0;
+
+ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+ netif_free(info);
+ out:
+ return err;
+}
+
+
+static int
+setup_device(struct xenbus_device *dev, struct netfront_info *info)
+{
+ netif_tx_sring_t *txs;
+ netif_rx_sring_t *rxs;
+ int err;
+ struct ifnet *ifp;
+
+ ifp = info->xn_ifp;
+
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+ info->irq = 0;
+
+ txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!txs) {
+ err = ENOMEM;
+ xenbus_dev_fatal(dev, err, "allocating tx ring page");
+ goto fail;
+ }
+ SHARED_RING_INIT(txs);
+ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+ err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+ if (err < 0)
+ goto fail;
+ info->tx_ring_ref = err;
+
+ rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!rxs) {
+ err = ENOMEM;
+ xenbus_dev_fatal(dev, err, "allocating rx ring page");
+ goto fail;
+ }
+ SHARED_RING_INIT(rxs);
+ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+ if (err < 0)
+ goto fail;
+ info->rx_ring_ref = err;
+
+#if 0
+ network_connect(ifp);
+#endif
+ err = bind_listening_port_to_irqhandler(dev->otherend_id,
+ "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+ goto fail;
+ }
+ info->irq = err;
+
+ show_device(info);
+
+ return 0;
+
+ fail:
+ netif_free(info);
+ return err;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void
+backend_changed(struct xenbus_device *dev,
+ XenbusState backend_state)
+{
+ struct netfront_info *sc = dev->dev_driver_data;
+
+ DPRINTK("\n");
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateConnected:
+ case XenbusStateUnknown:
+ case XenbusStateClosed:
+ break;
+ case XenbusStateInitWait:
+ if (dev->state != XenbusStateInitialising)
+ break;
+ if (network_connect(sc->xn_ifp) != 0)
+ break;
+ xenbus_switch_state(dev, XenbusStateConnected);
+#ifdef notyet
+ (void)send_fake_arp(netdev);
+#endif
+ break; break;
+ case XenbusStateClosing:
+ xenbus_frontend_closed(dev);
+ break;
+ }
+}
+
+static void
+xn_free_rx_ring(struct netfront_info *sc)
+{
+#if 0
+ int i;
+
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
+ if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
+ m_freem(sc->xn_cdata.xn_rx_chain[i]);
+ sc->xn_cdata.xn_rx_chain[i] = NULL;
+ }
+ }
+
+ sc->rx.rsp_cons = 0;
+ sc->xn_rx_if->req_prod = 0;
+ sc->xn_rx_if->event = sc->rx.rsp_cons ;
+#endif
+}
+
+static void
+xn_free_tx_ring(struct netfront_info *sc)
+{
+#if 0
+ int i;
+
+ for (i = 0; i < NET_TX_RING_SIZE; i++) {
+ if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
+ m_freem(sc->xn_cdata.xn_tx_chain[i]);
+ sc->xn_cdata.xn_tx_chain[i] = NULL;
+ }
+ }
+
+ return;
+#endif
+}
+
+static inline int
+netfront_tx_slot_available(struct netfront_info *np)
+{
+ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+ (TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
+}
+static void
+netif_release_tx_bufs(struct netfront_info *np)
+{
+ struct mbuf *m;
+ int i;
+
+ for (i = 1; i <= NET_TX_RING_SIZE; i++) {
+ m = np->xn_cdata.xn_tx_chain[i];
+
+ if (((u_long)m) < KERNBASE)
+ continue;
+ gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
+ np->xbdev->otherend_id, virt_to_mfn(mtod(m, vm_offset_t)),
+ GNTMAP_readonly);
+ gnttab_release_grant_reference(&np->gref_tx_head,
+ np->grant_tx_ref[i]);
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
+ add_id_to_freelist(np->tx_mbufs, i);
+ m_freem(m);
+ }
+}
+
+static void
+network_alloc_rx_buffers(struct netfront_info *sc)
+{
+ unsigned short id;
+ struct mbuf *m_new;
+ int i, batch_target, notify;
+ RING_IDX req_prod;
+ struct xen_memory_reservation reservation;
+ grant_ref_t ref;
+ int nr_flips;
+ netif_rx_request_t *req;
+ vm_offset_t vaddr;
+ u_long pfn;
+
+ req_prod = sc->rx.req_prod_pvt;
+
+ if (unlikely(sc->carrier == 0))
+ return;
+
+ /*
+ * Allocate skbuffs greedily, even though we batch updates to the
+ * receive ring. This creates a less bursty demand on the memory
+ * allocator, so should reduce the chance of failed allocation
+ * requests both for ourself and for other kernel subsystems.
+ */
+ batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
+ for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
+ MGETHDR(m_new, M_DONTWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto no_mbuf;
+
+ m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
+ if ((m_new->m_flags & M_EXT) == 0) {
+ m_freem(m_new);
+
+no_mbuf:
+ if (i != 0)
+ goto refill;
+ /*
+ * XXX set timer
+ */
+ break;
+ }
+ m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
+
+ /* queue the mbufs allocated */
+ mbufq_tail(&sc->xn_rx_batch, m_new);
+ }
+
+ /* Is the batch large enough to be worthwhile? */
+ if (i < (sc->rx_target/2)) {
+ if (req_prod >sc->rx.sring->req_prod)
+ goto push;
+ return;
+ }
+ /* Adjust floating fill target if we risked running out of buffers. */
+ if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
+ ((sc->rx_target *= 2) > sc->rx_max_target) )
+ sc->rx_target = sc->rx_max_target;
+
+refill:
+ for (nr_flips = i = 0; ; i++) {
+ if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
+ break;
+
+ m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
+ vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
+
+ id = xennet_rxidx(req_prod + i);
+
+ KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
+ ("non-NULL xm_rx_chain"));
+ sc->xn_cdata.xn_rx_chain[id] = m_new;
+
+ ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
+ KASSERT((short)ref >= 0, ("negative ref"));
+ sc->grant_rx_ref[id] = ref;
+
+ vaddr = mtod(m_new, vm_offset_t);
+ pfn = vtophys(vaddr) >> PAGE_SHIFT;
+ req = RING_GET_REQUEST(&sc->rx, req_prod + i);
+
+ if (sc->copying_receiver == 0) {
+ gnttab_grant_foreign_transfer_ref(ref,
+ sc->xbdev->otherend_id, pfn);
+ sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remove this page before passing
+ * back to Xen.
+ */
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ MULTI_update_va_mapping(&sc->rx_mcl[i],
+ vaddr, 0, 0);
+ }
+ nr_flips++;
+ } else {
+ gnttab_grant_foreign_access_ref(ref,
+ sc->xbdev->otherend_id,
+ PFNTOMFN(pfn), 0);
+ }
+ req->id = id;
+ req->gref = ref;
+
+ sc->rx_pfn_array[i] =
+ vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+ }
+
+ KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
+ KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
+ /*
+ * We may have allocated buffers which have entries outstanding
+ * in the page * update queue -- make sure we flush those first!
+ */
+ PT_UPDATES_FLUSH();
+ if (nr_flips != 0) {
+#ifdef notyet
+ /* Tell the ballon driver what is going on. */
+ balloon_update_driver_allowance(i);
+#endif
+ set_xen_guest_handle(reservation.extent_start,sc->rx_pfn_array);
+ reservation.nr_extents = i;
+ reservation.extent_order = 0;
+ reservation.address_bits = 0;
+ reservation.domid = DOMID_SELF;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+ /* After all PTEs have been zapped, flush the TLB. */
+ sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+ UVMF_TLB_FLUSH|UVMF_ALL;
+
+ /* Give away a batch of pages. */
+ sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
+ sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+ sc->rx_mcl[i].args[1] = (u_long)&reservation;
+ /* Zap PTEs and give away pages in one big multicall. */
+ (void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_dom_mem_op(). */
+ if (unlikely(sc->rx_mcl[i].result != i))
+ panic("Unable to reduce memory reservation\n");
+ } else {
+ if (HYPERVISOR_memory_op(
+ XENMEM_decrease_reservation, &reservation)
+ != i)
+ panic("Unable to reduce memory "
+ "reservation\n");
+ }
+ } else {
+ wmb();
+ }
+
+ /* Above is a suitable barrier to ensure backend will see requests. */
+ sc->rx.req_prod_pvt = req_prod + i;
+push:
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
+ if (notify)
+ notify_remote_via_irq(sc->irq);
+}
+
+static void
+xn_rxeof(struct netfront_info *np)
+{
+ struct ifnet *ifp;
+ struct netfront_rx_info rinfo;
+ struct netif_rx_response *rx = &rinfo.rx;
+ struct netif_extra_info *extras = rinfo.extras;
+ RING_IDX i, rp;
+ multicall_entry_t *mcl;
+ struct mbuf *m;
+ struct mbuf_head rxq, errq, tmpq;
+ int err, pages_flipped = 0;
+
+ XN_RX_LOCK_ASSERT(np);
+ if (!netfront_carrier_ok(np))
+ return;
+
+ mbufq_init(&tmpq);
+ mbufq_init(&errq);
+ mbufq_init(&rxq);
+
+ ifp = np->xn_ifp;
+
+ rp = np->rx.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ i = np->rx.rsp_cons;
+ while ((i != rp)) {
+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+ memset(extras, 0, sizeof(rinfo.extras));
+
+ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
+ &pages_flipped);
+
+ if (unlikely(err)) {
+ while ((m = mbufq_dequeue(&tmpq)))
+ mbufq_tail(&errq, m);
+ np->stats.rx_errors++;
+ i = np->rx.rsp_cons;
+ continue;
+ }
+
+ m = mbufq_dequeue(&tmpq);
+
+ m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */
+ m->m_pkthdr.len = m->m_len = rx->status;
+ m->m_pkthdr.rcvif = ifp;
+
+ if ( rx->flags & NETRXF_data_validated ) {
+ /* Tell the stack the checksums are okay */
+ /*
+ * XXX this isn't necessarily the case - need to add
+ * check
+ */
+
+ m->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
+ | CSUM_PSEUDO_HDR);
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += rx->status;
+
+ mbufq_tail(&rxq, m);
+ np->rx.rsp_cons = ++i;
+ }
+
+ if (pages_flipped) {
+ /* Some pages are no longer absent... */
+#ifdef notyet
+ balloon_update_driver_allowance(-pages_flipped);
+#endif
+ /* Do all the remapping work, and M->P updates, in one big
+ * hypercall.
+ */
+ if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = np->rx_mcl + pages_flipped;
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (u_long)np->rx_mmu;
+ mcl->args[1] = pages_flipped;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ (void)HYPERVISOR_multicall(np->rx_mcl,
+ pages_flipped + 1);
+ }
+ }
+
+ while ((m = mbufq_dequeue(&errq)))
+ m_freem(m);
+
+ /*
+ * Process all the mbufs after the remapping is complete.
+ * Break the mbuf chain first though.
+ */
+ while ((m = mbufq_dequeue(&rxq)) != NULL) {
+ ifp->if_ipackets++;
+
+ /*
+ * Do we really need to drop the rx lock?
+ */
+ XN_RX_UNLOCK(np);
+ /* Pass it up. */
+ (*ifp->if_input)(ifp, m);
+ XN_RX_LOCK(np);
+ }
+
+ np->rx.rsp_cons = i;
+
+#if 0
+ /* If we get a callback with very few responses, reduce fill target. */
+ /* NB. Note exponential increase, linear decrease. */
+ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+ ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
+ np->rx_target = np->rx_min_target;
+#endif
+
+ network_alloc_rx_buffers(np);
+
+ np->rx.sring->rsp_event = i + 1;
+}
+
+static void
+xn_txeof(struct netfront_info *np)
+{
+ RING_IDX i, prod;
+ unsigned short id;
+ struct ifnet *ifp;
+ struct mbuf *m;
+
+ XN_TX_LOCK_ASSERT(np);
+
+ if (!netfront_carrier_ok(np))
+ return;
+
+ ifp = np->xn_ifp;
+ ifp->if_timer = 0;
+
+ do {
+ prod = np->tx.sring->rsp_prod;
+ rmb(); /* Ensure we see responses up to 'rp'. */
+
+ for (i = np->tx.rsp_cons; i != prod; i++) {
+ id = RING_GET_RESPONSE(&np->tx, i)->id;
+ m = np->xn_cdata.xn_tx_chain[id];
+
+ ifp->if_opackets++;
+ KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
+ M_ASSERTVALID(m);
+ if (unlikely(gnttab_query_foreign_access(
+ np->grant_tx_ref[id]) != 0)) {
+ printf("network_tx_buf_gc: warning "
+ "-- grant still in use by backend "
+ "domain.\n");
+ goto out;
+ }
+ gnttab_end_foreign_access_ref(
+ np->grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, np->grant_tx_ref[id]);
+ np->grant_tx_ref[id] = GRANT_INVALID_REF;
+
+ np->xn_cdata.xn_tx_chain[id] = NULL;
+ add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
+ m_freem(m);
+ }
+ np->tx.rsp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of
+ * tx_cons. Note that it is essential to schedule a
+ * callback, no matter how few buffers are pending. Even if
+ * there is space in the transmit ring, higher layers may
+ * be blocked because too much data is outstanding: in such
+ * cases notification from Xen is likely to be the only kick
+ * that we'll get.
+ */
+ np->tx.sring->rsp_event =
+ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+
+ mb();
+
+ } while (prod != np->tx.sring->rsp_prod);
+
+ out:
+ if (np->tx_full &&
+ ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
+ np->tx_full = 0;
+#if 0
+ if (np->user_state == UST_OPEN)
+ netif_wake_queue(dev);
+#endif
+ }
+
+}
+
+static void
+xn_intr(void *xsc)
+{
+ struct netfront_info *np = xsc;
+ struct ifnet *ifp = np->xn_ifp;
+
+#if 0
+ if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
+ likely(netfront_carrier_ok(np)) &&
+ ifp->if_drv_flags & IFF_DRV_RUNNING))
+ return;
+#endif
+ if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
+ XN_TX_LOCK(np);
+ xn_txeof(np);
+ XN_TX_UNLOCK(np);
+ }
+
+ XN_RX_LOCK(np);
+ xn_rxeof(np);
+ XN_RX_UNLOCK(np);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ xn_start(ifp);
+}
+
+
+static void
+xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
+ grant_ref_t ref)
+{
+ int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+ KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
+ np->rx_mbufs[new] = m;
+ np->grant_rx_ref[new] = ref;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+ np->rx.req_prod_pvt++;
+}
+
+static int
+xennet_get_extras(struct netfront_info *np,
+ struct netif_extra_info *extras, RING_IDX rp)
+{
+ struct netif_extra_info *extra;
+ RING_IDX cons = np->rx.rsp_cons;
+
+ int err = 0;
+
+ do {
+ struct mbuf *m;
+ grant_ref_t ref;
+
+ if (unlikely(cons + 1 == rp)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Missing extra info\n");
+#endif
+ err = -EINVAL;
+ break;
+ }
+
+ extra = (struct netif_extra_info *)
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Invalid extra type: %d\n",
+ extra->type);
+#endif
+ err = -EINVAL;
+ } else {
+ memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+ }
+
+ m = xennet_get_rx_mbuf(np, cons);
+ ref = xennet_get_rx_ref(np, cons);
+ xennet_move_rx_slot(np, m, ref);
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+ np->rx.rsp_cons = cons;
+ return err;
+}
+
+static int
+xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp,
+ struct mbuf_head *list,
+ int *pages_flipped_p)
+{
+ int pages_flipped = *pages_flipped_p;
+ struct mmu_update *mmu;
+ struct multicall_entry *mcl;
+ struct netif_rx_response *rx = &rinfo->rx;
+ struct netif_extra_info *extras = rinfo->extras;
+ RING_IDX cons = np->rx.rsp_cons;
+ struct mbuf *m = xennet_get_rx_mbuf(np, cons);
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
+ int max = 24 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
+ int frags = 1;
+ int err = 0;
+ u_long ret;
+
+ if (rx->flags & NETRXF_extra_info) {
+ err = xennet_get_extras(np, extras, rp);
+ cons = np->rx.rsp_cons;
+ }
+
+ for (;;) {
+ u_long mfn;
+
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > PAGE_SIZE)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("rx->offset: %x, size: %u\n",
+ rx->offset, rx->status);
+#endif
+ xennet_move_rx_slot(np, m, ref);
+ err = -EINVAL;
+ goto next;
+ }
+
+ /*
+ * This definitely indicates a bug, either in this driver or in
+ * the backend driver. In future this should flag the bad
+ * situation to the system controller to reboot the backed.
+ */
+ if (ref == GRANT_INVALID_REF) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Bad rx response id %d.\n", rx->id);
+#endif
+ err = -EINVAL;
+ goto next;
+ }
+
+ if (!np->copying_receiver) {
+ /* Memory pressure, insufficient buffer
+ * headroom, ...
+ */
+ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+ if (net_ratelimit())
+ WPRINTK("Unfulfilled rx req "
+ "(id=%d, st=%d).\n",
+ rx->id, rx->status);
+ xennet_move_rx_slot(np, m, ref);
+ err = -ENOMEM;
+ goto next;
+ }
+
+ if (!xen_feature( XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ void *vaddr = mtod(m, void *);
+ uint32_t pfn;
+
+ mcl = np->rx_mcl + pages_flipped;
+ mmu = np->rx_mmu + pages_flipped;
+
+ MULTI_update_va_mapping(mcl, (u_long)vaddr,
+ (mfn << PAGE_SHIFT) | PG_RW |
+ PG_V | PG_M | PG_A, 0);
+ pfn = (uint32_t)m->m_ext.ext_arg1;
+ mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+ pages_flipped++;
+ } else {
+ ret = gnttab_end_foreign_access_ref(ref, 0);
+ KASSERT(ret, ("ret != 0"));
+ }
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ mbufq_tail(list, m);
+
+next:
+ if (!(rx->flags & NETRXF_more_data))
+ break;
+
+ if (cons + frags == rp) {
+ if (net_ratelimit())
+ WPRINTK("Need more frags\n");
+ err = -ENOENT;
+ break;
+ }
+
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+ m = xennet_get_rx_mbuf(np, cons + frags);
+ ref = xennet_get_rx_ref(np, cons + frags);
+ frags++;
+ }
+
+ if (unlikely(frags > max)) {
+ if (net_ratelimit())
+ WPRINTK("Too many frags\n");
+ err = -E2BIG;
+ }
+
+ if (unlikely(err))
+ np->rx.rsp_cons = cons + frags;
+
+ *pages_flipped_p = pages_flipped;
+
+ return err;
+}
+
+static void
+xn_tick_locked(struct netfront_info *sc)
+{
+ XN_RX_LOCK_ASSERT(sc);
+ callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+ /* XXX placeholder for printing debug information */
+
+}
+
+
+static void
+xn_tick(void *xsc)
+{
+ struct netfront_info *sc;
+
+ sc = xsc;
+ XN_RX_LOCK(sc);
+ xn_tick_locked(sc);
+ XN_RX_UNLOCK(sc);
+
+}
+static void
+xn_start_locked(struct ifnet *ifp)
+{
+ unsigned short id;
+ struct mbuf *m_head, *new_m;
+ struct netfront_info *sc;
+ netif_tx_request_t *tx;
+ RING_IDX i;
+ grant_ref_t ref;
+ u_long mfn, tx_bytes;
+ int notify;
+
+ sc = ifp->if_softc;
+ tx_bytes = 0;
+
+ if (!netfront_carrier_ok(sc))
+ return;
+
+ for (i = sc->tx.req_prod_pvt; TRUE; i++) {
+ IF_DEQUEUE(&ifp->if_snd, m_head);
+ if (m_head == NULL)
+ break;
+
+ if (!netfront_tx_slot_available(sc)) {
+ IF_PREPEND(&ifp->if_snd, m_head);
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ break;
+ }
+
+ id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
+
+ /*
+ * Start packing the mbufs in this chain into
+ * the fragment pointers. Stop when we run out
+ * of fragments or hit the end of the mbuf chain.
+ */
+ new_m = makembuf(m_head);
+ tx = RING_GET_REQUEST(&sc->tx, i);
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
+ KASSERT((short)ref >= 0, ("Negative ref"));
+ mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
+ gnttab_grant_foreign_access_ref(ref, sc->xbdev->otherend_id,
+ mfn, GNTMAP_readonly);
+ tx->gref = sc->grant_tx_ref[id] = ref;
+ tx->size = new_m->m_pkthdr.len;
+#if 0
+ tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0;
+#endif
+ tx->flags = 0;
+ new_m->m_next = NULL;
+ new_m->m_nextpkt = NULL;
+
+ m_freem(m_head);
+
+ sc->xn_cdata.xn_tx_chain[id] = new_m;
+ BPF_MTAP(ifp, new_m);
+
+ sc->stats.tx_bytes += new_m->m_pkthdr.len;
+ sc->stats.tx_packets++;
+ }
+
+ sc->tx.req_prod_pvt = i;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
+ if (notify)
+ notify_remote_via_irq(sc->irq);
+
+ xn_txeof(sc);
+
+ if (RING_FULL(&sc->tx)) {
+ sc->tx_full = 1;
+#if 0
+ netif_stop_queue(dev);
+#endif
+ }
+
+ return;
+}
+
+static void
+xn_start(struct ifnet *ifp)
+{
+ struct netfront_info *sc;
+ sc = ifp->if_softc;
+ XN_TX_LOCK(sc);
+ xn_start_locked(ifp);
+ XN_TX_UNLOCK(sc);
+}
+
+/* equivalent of network_open() in Linux */
+static void
+xn_ifinit_locked(struct netfront_info *sc)
+{
+ struct ifnet *ifp;
+
+ XN_LOCK_ASSERT(sc);
+
+ ifp = sc->xn_ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ xn_stop(sc);
+
+ network_alloc_rx_buffers(sc);
+ sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+}
+
+
+static void
+xn_ifinit(void *xsc)
+{
+ struct netfront_info *sc = xsc;
+
+ XN_LOCK(sc);
+ xn_ifinit_locked(sc);
+ XN_UNLOCK(sc);
+
+}
+
+
+static int
+xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct netfront_info *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *) data;
+ struct ifaddr *ifa = (struct ifaddr *)data;
+
+ int mask, error = 0;
+ switch(cmd) {
+ case SIOCSIFADDR:
+ case SIOCGIFADDR:
+ XN_LOCK(sc);
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ ifp->if_flags |= IFF_UP;
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ xn_ifinit_locked(sc);
+ arp_ifinit(ifp, ifa);
+ } else
+ error = ether_ioctl(ifp, cmd, data);
+ XN_UNLOCK(sc);
+ break;
+ case SIOCSIFMTU:
+ /* XXX can we alter the MTU on a VN ?*/
+#ifdef notyet
+ if (ifr->ifr_mtu > XN_JUMBO_MTU)
+ error = EINVAL;
+ else
+#endif
+ {
+ ifp->if_mtu = ifr->ifr_mtu;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ xn_ifinit(sc);
+ }
+ break;
+ case SIOCSIFFLAGS:
+ XN_LOCK(sc);
+ if (ifp->if_flags & IFF_UP) {
+ /*
+ * If only the state of the PROMISC flag changed,
+ * then just use the 'set promisc mode' command
+ * instead of reinitializing the entire NIC. Doing
+ * a full re-init means reloading the firmware and
+ * waiting for it to start up, which may take a
+ * second or two.
+ */
+#ifdef notyet
+ /* No promiscuous mode with Xen */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ ifp->if_flags & IFF_PROMISC &&
+ !(sc->xn_if_flags & IFF_PROMISC)) {
+ XN_SETBIT(sc, XN_RX_MODE,
+ XN_RXMODE_RX_PROMISC);
+ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ !(ifp->if_flags & IFF_PROMISC) &&
+ sc->xn_if_flags & IFF_PROMISC) {
+ XN_CLRBIT(sc, XN_RX_MODE,
+ XN_RXMODE_RX_PROMISC);
+ } else
+#endif
+ xn_ifinit_locked(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ xn_stop(sc);
+ }
+ }
+ sc->xn_if_flags = ifp->if_flags;
+ XN_UNLOCK(sc);
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (mask & IFCAP_HWCSUM) {
+ if (IFCAP_HWCSUM & ifp->if_capenable)
+ ifp->if_capenable &= ~IFCAP_HWCSUM;
+ else
+ ifp->if_capenable |= IFCAP_HWCSUM;
+ }
+ error = 0;
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+#ifdef notyet
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ XN_LOCK(sc);
+ xn_setmulti(sc);
+ XN_UNLOCK(sc);
+ error = 0;
+ }
+#endif
+ /* FALLTHROUGH */
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = EINVAL;
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ }
+
+ return (error);
+}
+
+static void
+xn_stop(struct netfront_info *sc)
+{
+ struct ifnet *ifp;
+
+ XN_LOCK_ASSERT(sc);
+
+ ifp = sc->xn_ifp;
+
+ callout_stop(&sc->xn_stat_ch);
+
+ xn_free_rx_ring(sc);
+ xn_free_tx_ring(sc);
+
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+}
+
+/* START of Xenolinux helper functions adapted to FreeBSD */
+static int
+network_connect(struct ifnet *ifp)
+{
+ struct netfront_info *np;
+ int i, requeue_idx, err;
+ grant_ref_t ref;
+ netif_rx_request_t *req;
+ u_int feature_rx_copy, feature_rx_flip;
+
+ printf("network_connect\n");
+
+ np = ifp->if_softc;
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-copy", "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-flip", "%u", &feature_rx_flip);
+ if (err != 1)
+ feature_rx_flip = 1;
+
+ /*
+ * Copy packets on receive path if:
+ * (a) This was requested by user, and the backend supports it; or
+ * (b) Flipping was requested, but this is unsupported by the backend.
+ */
+ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
+ (MODPARM_rx_flip && !feature_rx_flip));
+
+ XN_LOCK(np);
+ /* Recovery procedure: */
+ err = talk_to_backend(np->xbdev, np);
+ if (err)
+ return (err);
+
+ /* Step 1: Reinitialise variables. */
+ netif_release_tx_bufs(np);
+
+ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+ struct mbuf *m;
+
+ if (np->rx_mbufs[i] == NULL)
+ continue;
+
+ m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
+ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+ req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+ if (!np->copying_receiver) {
+ gnttab_grant_foreign_transfer_ref(ref,
+ np->xbdev->otherend_id,
+ vtophys(mtod(m, vm_offset_t)));
+ } else {
+ gnttab_grant_foreign_access_ref(ref,
+ np->xbdev->otherend_id,
+ vtophys(mtod(m, vm_offset_t)), 0);
+ }
+ req->gref = ref;
+ req->id = requeue_idx;
+
+ requeue_idx++;
+ }
+
+ np->rx.req_prod_pvt = requeue_idx;
+
+ /* Step 3: All public and private state should now be sane. Get
+ * ready to start sending and receiving packets and give the driver
+ * domain a kick because we've probably just requeued some
+ * packets.
+ */
+ netfront_carrier_on(np);
+ notify_remote_via_irq(np->irq);
+ XN_TX_LOCK(np);
+ xn_txeof(np);
+ XN_TX_UNLOCK(np);
+ network_alloc_rx_buffers(np);
+ XN_UNLOCK(np);
+
+ return (0);
+}
+
+
+static void
+show_device(struct netfront_info *sc)
+{
+#ifdef DEBUG
+ if (sc) {
+ IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+ sc->xn_ifno,
+ be_state_name[sc->xn_backend_state],
+ sc->xn_user_state ? "open" : "closed",
+ sc->xn_evtchn,
+ sc->xn_irq,
+ sc->xn_tx_if,
+ sc->xn_rx_if);
+ } else {
+ IPRINTK("<vif NULL>\n");
+ }
+#endif
+}
+
+static int ifno = 0;
+
+/** Create a network device.
+ * @param handle device handle
+ */
+static int
+create_netdev(struct xenbus_device *dev, struct ifnet **ifpp)
+{
+ int i;
+ struct netfront_info *np;
+ int err;
+ struct ifnet *ifp;
+
+ np = (struct netfront_info *)malloc(sizeof(struct netfront_info),
+ M_DEVBUF, M_NOWAIT);
+ if (np == NULL)
+ return (ENOMEM);
+
+ memset(np, 0, sizeof(struct netfront_info));
+
+ np->xbdev = dev;
+
+ XN_LOCK_INIT(np, xennetif);
+ np->rx_target = RX_MIN_TARGET;
+ np->rx_min_target = RX_MIN_TARGET;
+ np->rx_max_target = RX_MAX_TARGET;
+
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for (i = 0; i <= NET_TX_RING_SIZE; i++) {
+ np->tx_mbufs[i] = (void *) ((u_long) i+1);
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
+ }
+ for (i = 0; i <= NET_RX_RING_SIZE; i++) {
+ np->rx_mbufs[i] = NULL;
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ }
+ /* A grant for every tx ring slot */
+ if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+ &np->gref_tx_head) < 0) {
+ printf("#### netfront can't alloc tx grant refs\n");
+ err = ENOMEM;
+ goto exit;
+ }
+ /* A grant for every rx ring slot */
+ if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+ &np->gref_rx_head) < 0) {
+ printf("#### netfront can't alloc rx grant refs\n");
+ gnttab_free_grant_references(np->gref_tx_head);
+ err = ENOMEM;
+ goto exit;
+ }
+
+ err = xen_net_read_mac(dev, np->mac);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+ goto out;
+ }
+
+ /* Set up ifnet structure */
+ *ifpp = ifp = np->xn_ifp = if_alloc(IFT_ETHER);
+ ifp->if_softc = np;
+ if_initname(ifp, "xn", ifno++/* ifno */);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+ ifp->if_ioctl = xn_ioctl;
+ ifp->if_output = ether_output;
+ ifp->if_start = xn_start;
+#ifdef notyet
+ ifp->if_watchdog = xn_watchdog;
+#endif
+ ifp->if_init = xn_ifinit;
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+
+#ifdef notyet
+ ifp->if_hwassist = XN_CSUM_FEATURES;
+ ifp->if_capabilities = IFCAP_HWCSUM;
+ ifp->if_capenable = ifp->if_capabilities;
+#endif
+
+ ether_ifattach(ifp, np->mac);
+ callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
+ netfront_carrier_off(np);
+
+ return (0);
+
+exit:
+ gnttab_free_grant_references(np->gref_tx_head);
+out:
+ panic("do something smart");
+
+}
+
+/**
+ * Handle the change of state of the backend to Closing. We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend. Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+#if 0
+static void netfront_closing(struct xenbus_device *dev)
+{
+#if 0
+ struct netfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+
+ close_netdev(info);
+#endif
+ xenbus_switch_state(dev, XenbusStateClosed);
+}
+#endif
+
+static int netfront_remove(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev->dev_driver_data;
+
+ DPRINTK("%s\n", dev->nodename);
+
+ netif_free(info);
+ free(info, M_DEVBUF);
+
+ return 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+ netif_disconnect_backend(info);
+#if 0
+ close_netdev(info);
+#endif
+}
+
+
+
+static void netif_disconnect_backend(struct netfront_info *info)
+{
+ xn_stop(info);
+ end_access(info->tx_ring_ref, info->tx.sring);
+ end_access(info->rx_ring_ref, info->rx.sring);
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->tx.sring = NULL;
+ info->rx.sring = NULL;
+
+#if 0
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info->netdev);
+#else
+ panic("FIX ME");
+#endif
+ info->irq = 0;
+}
+
+
+static void end_access(int ref, void *page)
+{
+ if (ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(ref, 0, page);
+}
+
+
+/* ** Driver registration ** */
+
+
+static struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+
+static struct xenbus_driver netfront = {
+ .name = "vif",
+ .ids = netfront_ids,
+ .probe = netfront_probe,
+ .remove = netfront_remove,
+ .resume = netfront_resume,
+ .otherend_changed = backend_changed,
+};
+
+static void
+netif_init(void *unused)
+{
+ if (!is_running_on_xen())
+ return;
+
+ if (is_initial_xendomain())
+ return;
+
+ IPRINTK("Initialising virtual ethernet driver.\n");
+
+ xenbus_register_frontend(&netfront);
+}
+
+SYSINIT(xennetif, SI_SUB_PSEUDO, SI_ORDER_ANY, netif_init, NULL)
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/sys/dev/xen/pcifront/pcifront.c b/sys/dev/xen/pcifront/pcifront.c
new file mode 100644
index 0000000..e6c498b
--- /dev/null
+++ b/sys/dev/xen/pcifront/pcifront.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <machine/vmparam.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <machine/xen-public/io/pciif.h>
+
+#include <sys/pciio.h>
+#include <dev/pci/pcivar.h>
+#include "pcib_if.h"
+
+#ifdef XEN_PCIDEV_FE_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+#define WPRINTF(fmt, args...) \
+ printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define INVALID_GRANT_REF (0)
+#define INVALID_EVTCHN (-1)
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+struct pcifront_device {
+ STAILQ_ENTRY(pcifront_device) next;
+
+ struct xenbus_device *xdev;
+
+ int unit;
+ int evtchn;
+ int gnt_ref;
+
+ /* Lock this when doing any operations in sh_info */
+ struct mtx sh_info_lock;
+ struct xen_pci_sharedinfo *sh_info;
+
+ device_t ndev;
+
+ int ref_cnt;
+};
+
+static STAILQ_HEAD(pcifront_dlist, pcifront_device) pdev_list = STAILQ_HEAD_INITIALIZER(pdev_list);
+
+struct xpcib_softc {
+ int domain;
+ int bus;
+ struct pcifront_device *pdev;
+};
+
+/* Allocate a PCI device structure */
+static struct pcifront_device *
+alloc_pdev(struct xenbus_device *xdev)
+{
+ struct pcifront_device *pdev = NULL;
+ int err, unit;
+
+ err = sscanf(xdev->nodename, "device/pci/%d", &unit);
+ if (err != 1) {
+ if (err == 0)
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err, "Error scanning pci device instance number");
+ goto out;
+ }
+
+ pdev = (struct pcifront_device *)malloc(sizeof(struct pcifront_device), M_DEVBUF, M_NOWAIT);
+ if (pdev == NULL) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(xdev, err, "Error allocating pcifront_device struct");
+ goto out;
+ }
+ pdev->unit = unit;
+ pdev->xdev = xdev;
+ pdev->ref_cnt = 1;
+
+ pdev->sh_info = (struct xen_pci_sharedinfo *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+ if (pdev->sh_info == NULL) {
+ free(pdev, M_DEVBUF);
+ pdev = NULL;
+ err = -ENOMEM;
+ xenbus_dev_fatal(xdev, err, "Error allocating sh_info struct");
+ goto out;
+ }
+ pdev->sh_info->flags = 0;
+
+ xdev->data = pdev;
+
+ mtx_init(&pdev->sh_info_lock, "info_lock", "pci shared dev info lock", MTX_DEF);
+
+ pdev->evtchn = INVALID_EVTCHN;
+ pdev->gnt_ref = INVALID_GRANT_REF;
+
+ STAILQ_INSERT_TAIL(&pdev_list, pdev, next);
+
+ DPRINTF("Allocated pdev @ 0x%p (unit=%d)\n", pdev, unit);
+
+ out:
+ return pdev;
+}
+
+/* Hold a reference to a pcifront device */
+static void
+get_pdev(struct pcifront_device *pdev)
+{
+ pdev->ref_cnt++;
+}
+
+/* Release a reference to a pcifront device */
+static void
+put_pdev(struct pcifront_device *pdev)
+{
+ if (--pdev->ref_cnt > 0)
+ return;
+
+ DPRINTF("freeing pdev @ 0x%p (ref_cnt=%d)\n", pdev, pdev->ref_cnt);
+
+ if (pdev->evtchn != INVALID_EVTCHN)
+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+ if (pdev->gnt_ref != INVALID_GRANT_REF)
+ gnttab_end_foreign_access(pdev->gnt_ref, 0, (void *)pdev->sh_info);
+
+ pdev->xdev->data = NULL;
+
+ free(pdev, M_DEVBUF);
+}
+
+
+/* Write to the xenbus info needed by backend */
+static int
+pcifront_publish_info(struct pcifront_device *pdev)
+{
+ int err = 0;
+ struct xenbus_transaction *trans;
+
+ err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+ if (err < 0) {
+ WPRINTF("error granting access to ring page\n");
+ goto out;
+ }
+
+ pdev->gnt_ref = err;
+
+ err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+ if (err)
+ goto out;
+
+ do_publish:
+ trans = xenbus_transaction_start();
+ if (IS_ERR(trans)) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error writing configuration for backend "
+ "(start transaction)");
+ goto out;
+ }
+
+ err = xenbus_printf(trans, pdev->xdev->nodename,
+ "pci-op-ref", "%u", pdev->gnt_ref);
+ if (!err)
+ err = xenbus_printf(trans, pdev->xdev->nodename,
+ "event-channel", "%u", pdev->evtchn);
+ if (!err)
+ err = xenbus_printf(trans, pdev->xdev->nodename,
+ "magic", XEN_PCI_MAGIC);
+ if (!err)
+ err = xenbus_switch_state(pdev->xdev, trans,
+ XenbusStateInitialised);
+
+ if (err) {
+ xenbus_transaction_end(trans, 1);
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error writing configuration for backend");
+ goto out;
+ } else {
+ err = xenbus_transaction_end(trans, 0);
+ if (err == -EAGAIN)
+ goto do_publish;
+ else if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error completing transaction for backend");
+ goto out;
+ }
+ }
+
+ out:
+ return err;
+}
+
+/* The backend is now connected so complete the connection process on our side */
+static int
+pcifront_connect(struct pcifront_device *pdev)
+{
+ device_t nexus;
+ devclass_t nexus_devclass;
+
+ /* We will add our device as a child of the nexus0 device */
+ if (!(nexus_devclass = devclass_find("nexus")) ||
+ !(nexus = devclass_get_device(nexus_devclass, 0))) {
+ WPRINTF("could not find nexus0!\n");
+ return -1;
+ }
+
+ /* Create a newbus device representing this frontend instance */
+ pdev->ndev = BUS_ADD_CHILD(nexus, 0, "xpcife", pdev->unit);
+ if (!pdev->ndev) {
+ WPRINTF("could not create xpcife%d!\n", pdev->unit);
+ return -EFAULT;
+ }
+ get_pdev(pdev);
+ device_set_ivars(pdev->ndev, pdev);
+
+ /* Good to go connected now */
+ xenbus_switch_state(pdev->xdev, NULL, XenbusStateConnected);
+
+ printf("pcifront: connected to %s\n", pdev->xdev->nodename);
+
+ mtx_lock(&Giant);
+ device_probe_and_attach(pdev->ndev);
+ mtx_unlock(&Giant);
+
+ return 0;
+}
+
+/* The backend is closing so process a disconnect */
+static int
+pcifront_disconnect(struct pcifront_device *pdev)
+{
+ int err = 0;
+ XenbusState prev_state;
+
+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+ if (prev_state < XenbusStateClosing) {
+ err = xenbus_switch_state(pdev->xdev, NULL, XenbusStateClosing);
+ if (!err && prev_state == XenbusStateConnected) {
+ /* TODO - need to detach the newbus devices */
+ }
+ }
+
+ return err;
+}
+
+/* Process a probe from the xenbus */
+static int
+pcifront_probe(struct xenbus_device *xdev,
+ const struct xenbus_device_id *id)
+{
+ int err = 0;
+ struct pcifront_device *pdev;
+
+ DPRINTF("xenbus probing\n");
+
+ if ((pdev = alloc_pdev(xdev)) == NULL)
+ goto out;
+
+ err = pcifront_publish_info(pdev);
+
+ out:
+ if (err)
+ put_pdev(pdev);
+ return err;
+}
+
+/* Remove the xenbus PCI device */
+static int
+pcifront_remove(struct xenbus_device *xdev)
+{
+ DPRINTF("removing xenbus device node (%s)\n", xdev->nodename);
+ if (xdev->data)
+ put_pdev(xdev->data);
+ return 0;
+}
+
+/* Called by xenbus when our backend node changes state */
+static void
+pcifront_backend_changed(struct xenbus_device *xdev,
+ XenbusState be_state)
+{
+ struct pcifront_device *pdev = xdev->data;
+
+ switch (be_state) {
+ case XenbusStateClosing:
+ DPRINTF("backend closing (%s)\n", xdev->nodename);
+ pcifront_disconnect(pdev);
+ break;
+
+ case XenbusStateClosed:
+ DPRINTF("backend closed (%s)\n", xdev->nodename);
+ pcifront_disconnect(pdev);
+ break;
+
+ case XenbusStateConnected:
+ DPRINTF("backend connected (%s)\n", xdev->nodename);
+ pcifront_connect(pdev);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Process PCI operation */
+static int
+do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+ int err = 0;
+ struct xen_pci_op *active_op = &pdev->sh_info->op;
+ evtchn_port_t port = pdev->evtchn;
+ time_t timeout;
+
+ mtx_lock(&pdev->sh_info_lock);
+
+ memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+ /* Go */
+ wmb();
+ set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+ notify_remote_via_evtchn(port);
+
+ timeout = time_uptime + 2;
+
+ clear_evtchn(port);
+
+ /* Spin while waiting for the answer */
+ while (test_bit
+ (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) {
+ int err = HYPERVISOR_poll(&port, 1, 3 * hz);
+ if (err)
+ panic("Failed HYPERVISOR_poll: err=%d", err);
+ clear_evtchn(port);
+ if (time_uptime > timeout) {
+ WPRINTF("pciback not responding!!!\n");
+ clear_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags);
+ err = XEN_PCI_ERR_dev_not_found;
+ goto out;
+ }
+ }
+
+ memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+ err = op->err;
+ out:
+ mtx_unlock(&pdev->sh_info_lock);
+ return err;
+}
+
+/* ** XenBus Driver registration ** */
+
+static struct xenbus_device_id pcifront_ids[] = {
+ { "pci" },
+ { "" }
+};
+
+static struct xenbus_driver pcifront = {
+ .name = "pcifront",
+ .ids = pcifront_ids,
+ .probe = pcifront_probe,
+ .remove = pcifront_remove,
+ .otherend_changed = pcifront_backend_changed,
+};
+
+/* Register the driver with xenbus during sys init */
+static void
+pcifront_init(void *unused)
+{
+ if ((xen_start_info->flags & SIF_INITDOMAIN))
+ return;
+
+ DPRINTF("xenbus registering\n");
+
+ xenbus_register_frontend(&pcifront);
+}
+
+SYSINIT(pciif, SI_SUB_PSEUDO, SI_ORDER_ANY, pcifront_init, NULL)
+
+
+/* Newbus xpcife device driver probe */
+static int
+xpcife_probe(device_t dev)
+{
+#ifdef XEN_PCIDEV_FE_DEBUG
+ struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+ DPRINTF("xpcife probe (unit=%d)\n", pdev->unit);
+#endif
+ return 0;
+}
+
+/* Newbus xpcife device driver attach */
+static int
+xpcife_attach(device_t dev)
+{
+ struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+ int i, num_roots, len, err;
+ char str[64];
+ unsigned int domain, bus;
+
+ DPRINTF("xpcife attach (unit=%d)\n", pdev->unit);
+
+ err = xenbus_scanf(NULL, pdev->xdev->otherend,
+ "root_num", "%d", &num_roots);
+ if (err != 1) {
+ if (err == 0)
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading number of PCI roots");
+ goto out;
+ }
+
+ /* Add a pcib device for each root */
+ for (i = 0; i < num_roots; i++) {
+ device_t child;
+
+ len = snprintf(str, sizeof(str), "root-%d", i);
+ if (unlikely(len >= (sizeof(str) - 1))) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xenbus_scanf(NULL, pdev->xdev->otherend, str,
+ "%x:%x", &domain, &bus);
+ if (err != 2) {
+ if (err >= 0)
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading PCI root %d", i);
+ goto out;
+ }
+ err = 0;
+ if (domain != pdev->xdev->otherend_id) {
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Domain mismatch %d != %d", domain, pdev->xdev->otherend_id);
+ goto out;
+ }
+
+ child = device_add_child(dev, "pcib", bus);
+ if (!child) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Unable to create pcib%d", bus);
+ goto out;
+ }
+ }
+
+ out:
+ return bus_generic_attach(dev);
+}
+
+static devclass_t xpcife_devclass;
+
+static device_method_t xpcife_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, xpcife_probe),
+ DEVMETHOD(device_attach, xpcife_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+ /* Bus interface */
+ DEVMETHOD(bus_print_child, bus_generic_print_child),
+ DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource),
+ DEVMETHOD(bus_release_resource, bus_generic_release_resource),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+ DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+ {0, 0}
+};
+
+static driver_t xpcife_driver = {
+ "xpcife",
+ xpcife_methods,
+ 0,
+};
+
+DRIVER_MODULE(xpcife, nexus, xpcife_driver, xpcife_devclass, 0, 0);
+
+
+/* Newbus xen pcib device driver probe */
+static int
+xpcib_probe(device_t dev)
+{
+ struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+ struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(device_get_parent(dev));
+
+ DPRINTF("xpcib probe (bus=%d)\n", device_get_unit(dev));
+
+ sc->domain = pdev->xdev->otherend_id;
+ sc->bus = device_get_unit(dev);
+ sc->pdev = pdev;
+
+ return 0;
+}
+
+/* Newbus xen pcib device driver attach */
+static int
+xpcib_attach(device_t dev)
+{
+ struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+
+ DPRINTF("xpcib attach (bus=%d)\n", sc->bus);
+
+ device_add_child(dev, "pci", sc->bus);
+ return bus_generic_attach(dev);
+}
+
+static int
+xpcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
+{
+ struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+ switch (which) {
+ case PCIB_IVAR_BUS:
+ *result = sc->bus;
+ return 0;
+ }
+ return ENOENT;
+}
+
+/* Return the number of slots supported */
+static int
+xpcib_maxslots(device_t dev)
+{
+ return 31;
+}
+
+#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+
+/* Read configuration space register */
+static u_int32_t
+xpcib_read_config(device_t dev, int bus, int slot, int func,
+ int reg, int bytes)
+{
+ struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+ struct xen_pci_op op = {
+ .cmd = XEN_PCI_OP_conf_read,
+ .domain = sc->domain,
+ .bus = sc->bus,
+ .devfn = PCI_DEVFN(slot, func),
+ .offset = reg,
+ .size = bytes,
+ };
+ int err;
+
+ err = do_pci_op(sc->pdev, &op);
+
+ DPRINTF("read config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+ bus, slot, func, reg, bytes, op.value, err);
+
+ if (err)
+ op.value = ~0;
+
+ return op.value;
+}
+
+/* Write configuration space register */
+static void
+xpcib_write_config(device_t dev, int bus, int slot, int func,
+ int reg, u_int32_t data, int bytes)
+{
+ struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+ struct xen_pci_op op = {
+ .cmd = XEN_PCI_OP_conf_write,
+ .domain = sc->domain,
+ .bus = sc->bus,
+ .devfn = PCI_DEVFN(slot, func),
+ .offset = reg,
+ .size = bytes,
+ .value = data,
+ };
+ int err;
+
+ err = do_pci_op(sc->pdev, &op);
+
+ DPRINTF("write config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+ bus, slot, func, reg, bytes, data, err);
+}
+
+static int
+xpcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ pcicfgregs *cfg = &dinfo->cfg;
+
+ DPRINTF("route intr (pin=%d, line=%d)\n", pin, cfg->intline);
+
+ return cfg->intline;
+}
+
+static device_method_t xpcib_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, xpcib_probe),
+ DEVMETHOD(device_attach, xpcib_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ /* Bus interface */
+ DEVMETHOD(bus_print_child, bus_generic_print_child),
+ DEVMETHOD(bus_read_ivar, xpcib_read_ivar),
+ DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+ DEVMETHOD(bus_release_resource, bus_generic_release_resource),
+ DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+
+ /* pcib interface */
+ DEVMETHOD(pcib_maxslots, xpcib_maxslots),
+ DEVMETHOD(pcib_read_config, xpcib_read_config),
+ DEVMETHOD(pcib_write_config, xpcib_write_config),
+ DEVMETHOD(pcib_route_interrupt, xpcib_route_interrupt),
+ { 0, 0 }
+};
+
+static devclass_t xpcib_devclass;
+
+DEFINE_CLASS_0(pcib, xpcib_driver, xpcib_methods, sizeof(struct xpcib_softc));
+DRIVER_MODULE(pcib, xpcife, xpcib_driver, xpcib_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */
OpenPOWER on IntegriCloud