summaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
commitfcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch)
tree22962a4387943edc841c72a4e636a068c66d58fd /drivers/xen
downloadast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip
ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz
Initial import of modified Linux 2.6.28 tree
Original upstream URL: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig19
-rw-r--r--drivers/xen/Makefile5
-rw-r--r--drivers/xen/balloon.c591
-rw-r--r--drivers/xen/cpu_hotplug.c90
-rw-r--r--drivers/xen/events.c831
-rw-r--r--drivers/xen/features.c29
-rw-r--r--drivers/xen/grant-table.c557
-rw-r--r--drivers/xen/manage.c252
-rw-r--r--drivers/xen/xenbus/Makefile7
-rw-r--r--drivers/xen/xenbus/xenbus_client.c569
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c234
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h46
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c962
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h74
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c861
-rw-r--r--drivers/xen/xencomm.c217
16 files changed, 5344 insertions, 0 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
new file mode 100644
index 0000000..4b75a16
--- /dev/null
+++ b/drivers/xen/Kconfig
@@ -0,0 +1,19 @@
+config XEN_BALLOON
+ bool "Xen memory balloon driver"
+ depends on XEN
+ default y
+ help
+ The balloon driver allows the Xen domain to request more memory from
+ the system to expand the domain's memory allocation, or alternatively
+ return unneeded memory to the system.
+
+config XEN_SCRUB_PAGES
+ bool "Scrub pages before returning them to system"
+ depends on XEN_BALLOON
+ default y
+ help
+ Scrub pages before returning them to the system for reuse by
+ other domains. This makes sure that any confidential data
+ is not accidentally visible to other domains. Is it more
+ secure, but slightly less efficient.
+ If in doubt, say yes.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 0000000..d2a8fdf
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1,5 @@
+obj-y += grant-table.o features.o events.o manage.o
+obj-y += xenbus/
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
+obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
new file mode 100644
index 0000000..ae2e3f8
--- /dev/null
+++ b/drivers/xen/balloon.c
@@ -0,0 +1,591 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/sysdev.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#include <xen/interface/memory.h>
+#include <xen/xenbus.h>
+#include <xen/features.h>
+#include <xen/page.h>
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#define BALLOON_CLASS_NAME "xen_memory"
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* We may hit the hard limit in Xen. If we do then we remember it. */
+ unsigned long hard_limit;
+ /*
+ * Drivers may alter the memory reservation independently, but they
+ * must inform the balloon driver so we avoid hitting the hard limit.
+ */
+ unsigned long driver_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+};
+
+static DEFINE_MUTEX(balloon_mutex);
+
+static struct sys_device balloon_sysdev;
+
+static int register_balloon(struct sys_device *sysdev);
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+static DEFINE_SPINLOCK(balloon_lock);
+
+static struct balloon_stats balloon_stats;
+
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+#ifdef CONFIG_HIGHMEM
+extern unsigned long totalhigh_pages;
+#define inc_totalhigh_pages() (totalhigh_pages++)
+#define dec_totalhigh_pages() (totalhigh_pages--)
+#else
+#define inc_totalhigh_pages() do {} while(0)
+#define dec_totalhigh_pages() do {} while(0)
+#endif
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+
+/* Main work function, always executed in process context. */
+static void balloon_process(struct work_struct *work);
+static DECLARE_WORK(balloon_worker, balloon_process);
+static struct timer_list balloon_timer;
+
+/* When ballooning out (allocating memory to return to Xen) we don't really
+ want the kernel to try too hard since that can trigger the oom killer. */
+#define GFP_BALLOON \
+ (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
+static void scrub_page(struct page *page)
+{
+#ifdef CONFIG_XEN_SCRUB_PAGES
+ clear_highpage(page);
+#endif
+}
+
+/* balloon_append: add the given page to the balloon. */
+static void balloon_append(struct page *page)
+{
+ /* Lowmem is re-populated first, so highmem pages go at list tail. */
+ if (PageHighMem(page)) {
+ list_add_tail(&page->lru, &ballooned_pages);
+ balloon_stats.balloon_high++;
+ dec_totalhigh_pages();
+ } else {
+ list_add(&page->lru, &ballooned_pages);
+ balloon_stats.balloon_low++;
+ }
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(void)
+{
+ struct page *page;
+
+ if (list_empty(&ballooned_pages))
+ return NULL;
+
+ page = list_entry(ballooned_pages.next, struct page, lru);
+ list_del(&page->lru);
+
+ if (PageHighMem(page)) {
+ balloon_stats.balloon_high--;
+ inc_totalhigh_pages();
+ }
+ else
+ balloon_stats.balloon_low--;
+
+ return page;
+}
+
+static struct page *balloon_first_page(void)
+{
+ if (list_empty(&ballooned_pages))
+ return NULL;
+ return list_entry(ballooned_pages.next, struct page, lru);
+}
+
+static struct page *balloon_next_page(struct page *page)
+{
+ struct list_head *next = page->lru.next;
+ if (next == &ballooned_pages)
+ return NULL;
+ return list_entry(next, struct page, lru);
+}
+
+static void balloon_alarm(unsigned long unused)
+{
+ schedule_work(&balloon_worker);
+}
+
+static unsigned long current_target(void)
+{
+ unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
+
+ target = min(target,
+ balloon_stats.current_pages +
+ balloon_stats.balloon_low +
+ balloon_stats.balloon_high);
+
+ return target;
+}
+
+static int increase_reservation(unsigned long nr_pages)
+{
+ unsigned long pfn, i, flags;
+ struct page *page;
+ long rc;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
+
+ spin_lock_irqsave(&balloon_lock, flags);
+
+ page = balloon_first_page();
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page == NULL);
+ frame_list[i] = page_to_pfn(page);;
+ page = balloon_next_page(page);
+ }
+
+ set_xen_guest_handle(reservation.extent_start, frame_list);
+ reservation.nr_extents = nr_pages;
+ rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+ if (rc < nr_pages) {
+ if (rc > 0) {
+ int ret;
+
+ /* We hit the Xen hard limit: reprobe. */
+ reservation.nr_extents = rc;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ BUG_ON(ret != rc);
+ }
+ if (rc >= 0)
+ balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
+ balloon_stats.driver_pages);
+ goto out;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ page = balloon_retrieve();
+ BUG_ON(page == NULL);
+
+ pfn = page_to_pfn(page);
+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+ phys_to_machine_mapping_valid(pfn));
+
+ set_phys_to_machine(pfn, frame_list[i]);
+
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(frame_list[i], PAGE_KERNEL),
+ 0);
+ BUG_ON(ret);
+ }
+
+ /* Relinquish the page back to the allocator. */
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ }
+
+ balloon_stats.current_pages += nr_pages;
+ totalram_pages = balloon_stats.current_pages;
+
+ out:
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+ unsigned long pfn, i, flags;
+ struct page *page;
+ int need_sleep = 0;
+ int ret;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
+
+ for (i = 0; i < nr_pages; i++) {
+ if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+ nr_pages = i;
+ need_sleep = 1;
+ break;
+ }
+
+ pfn = page_to_pfn(page);
+ frame_list[i] = pfn_to_mfn(pfn);
+
+ scrub_page(page);
+ }
+
+ /* Ensure that ballooned highmem pages don't have kmaps. */
+ kmap_flush_unused();
+ flush_tlb_all();
+
+ spin_lock_irqsave(&balloon_lock, flags);
+
+ /* No more mappings: invalidate P2M and add to balloon. */
+ for (i = 0; i < nr_pages; i++) {
+ pfn = mfn_to_pfn(frame_list[i]);
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ balloon_append(pfn_to_page(pfn));
+ }
+
+ set_xen_guest_handle(reservation.extent_start, frame_list);
+ reservation.nr_extents = nr_pages;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(ret != nr_pages);
+
+ balloon_stats.current_pages -= nr_pages;
+ totalram_pages = balloon_stats.current_pages;
+
+ spin_unlock_irqrestore(&balloon_lock, flags);
+
+ return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void balloon_process(struct work_struct *work)
+{
+ int need_sleep = 0;
+ long credit;
+
+ mutex_lock(&balloon_mutex);
+
+ do {
+ credit = current_target() - balloon_stats.current_pages;
+ if (credit > 0)
+ need_sleep = (increase_reservation(credit) != 0);
+ if (credit < 0)
+ need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+ if (need_resched())
+ schedule();
+#endif
+ } while ((credit != 0) && !need_sleep);
+
+ /* Schedule more work if there is some still to be done. */
+ if (current_target() != balloon_stats.current_pages)
+ mod_timer(&balloon_timer, jiffies + HZ);
+
+ mutex_unlock(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void balloon_set_new_target(unsigned long target)
+{
+ /* No need for lock. Not read-modify-write updates. */
+ balloon_stats.hard_limit = ~0UL;
+ balloon_stats.target_pages = target;
+ schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+ .node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void watch_target(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ unsigned long long new_target;
+ int err;
+
+ err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
+ if (err != 1) {
+ /* This is ok (for domain0 at least) - so just return */
+ return;
+ }
+
+ /* The given memory/target value is in KiB, so it needs converting to
+ * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+ */
+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+}
+
+static int balloon_init_watcher(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ int err;
+
+ err = register_xenbus_watch(&target_watch);
+ if (err)
+ printk(KERN_ERR "Failed to set balloon watcher\n");
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xenstore_notifier;
+
+static int __init balloon_init(void)
+{
+ unsigned long pfn;
+ struct page *page;
+
+ if (!xen_pv_domain())
+ return -ENODEV;
+
+ pr_info("xen_balloon: Initialising balloon driver.\n");
+
+ balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ totalram_pages = balloon_stats.current_pages;
+ balloon_stats.target_pages = balloon_stats.current_pages;
+ balloon_stats.balloon_low = 0;
+ balloon_stats.balloon_high = 0;
+ balloon_stats.driver_pages = 0UL;
+ balloon_stats.hard_limit = ~0UL;
+
+ init_timer(&balloon_timer);
+ balloon_timer.data = 0;
+ balloon_timer.function = balloon_alarm;
+
+ register_balloon(&balloon_sysdev);
+
+ /* Initialise the balloon with excess memory space. */
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ page = pfn_to_page(pfn);
+ if (!PageReserved(page))
+ balloon_append(page);
+ }
+
+ target_watch.callback = watch_target;
+ xenstore_notifier.notifier_call = balloon_init_watcher;
+
+ register_xenstore_notifier(&xenstore_notifier);
+
+ return 0;
+}
+
+subsys_initcall(balloon_init);
+
+static void balloon_exit(void)
+{
+ /* XXX - release balloon here */
+ return;
+}
+
+module_exit(balloon_exit);
+
+#define BALLOON_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
+BALLOON_SHOW(hard_limit_kb,
+ (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
+ (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
+
+static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
+}
+
+static ssize_t store_target_kb(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ char *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
+
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
+}
+
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+ show_target_kb, store_target_kb);
+
+
+static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (u64)balloon_stats.target_pages << PAGE_SHIFT);
+}
+
+static ssize_t store_target(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ char *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ target_bytes = memparse(buf, &endchar);
+
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
+}
+
+static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
+ show_target, store_target);
+
+
+static struct sysdev_attribute *balloon_attrs[] = {
+ &attr_target_kb,
+ &attr_target,
+};
+
+static struct attribute *balloon_info_attrs[] = {
+ &attr_current_kb.attr,
+ &attr_low_kb.attr,
+ &attr_high_kb.attr,
+ &attr_hard_limit_kb.attr,
+ &attr_driver_kb.attr,
+ NULL
+};
+
+static struct attribute_group balloon_info_group = {
+ .name = "info",
+ .attrs = balloon_info_attrs,
+};
+
+static struct sysdev_class balloon_sysdev_class = {
+ .name = BALLOON_CLASS_NAME,
+};
+
+static int register_balloon(struct sys_device *sysdev)
+{
+ int i, error;
+
+ error = sysdev_class_register(&balloon_sysdev_class);
+ if (error)
+ return error;
+
+ sysdev->id = 0;
+ sysdev->cls = &balloon_sysdev_class;
+
+ error = sysdev_register(sysdev);
+ if (error) {
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ if (error)
+ goto fail;
+ }
+
+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ if (error)
+ goto fail;
+
+ return 0;
+
+ fail:
+ while (--i >= 0)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+}
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
new file mode 100644
index 0000000..974f56d
--- /dev/null
+++ b/drivers/xen/cpu_hotplug.c
@@ -0,0 +1,90 @@
+#include <linux/notifier.h>
+
+#include <xen/xenbus.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/cpu.h>
+
+static void enable_hotplug_cpu(int cpu)
+{
+ if (!cpu_present(cpu))
+ arch_register_cpu(cpu);
+
+ cpu_set(cpu, cpu_present_map);
+}
+
+static void disable_hotplug_cpu(int cpu)
+{
+ if (cpu_present(cpu))
+ arch_unregister_cpu(cpu);
+
+ cpu_clear(cpu, cpu_present_map);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+ int err;
+ char dir[32], state[32];
+
+ if (!cpu_possible(cpu))
+ return;
+
+ sprintf(dir, "cpu/%u", cpu);
+ err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0) {
+ enable_hotplug_cpu(cpu);
+ } else if (strcmp(state, "offline") == 0) {
+ (void)cpu_down(cpu);
+ disable_hotplug_cpu(cpu);
+ } else {
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+ state, cpu);
+ }
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ unsigned int cpu;
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ cpustr = strstr(node, "cpu/");
+ if (cpustr != NULL) {
+ sscanf(cpustr, "cpu/%u", &cpu);
+ vcpu_hotplug(cpu);
+ }
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event};
+
+ (void)register_xenbus_watch(&cpu_watch);
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+
+ if (!xen_pv_domain())
+ return -ENODEV;
+
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
new file mode 100644
index 0000000..1e3b934
--- /dev/null
+++ b/drivers/xen/events.c
@@ -0,0 +1,831 @@
+/*
+ * Xen event channels
+ *
+ * Xen models interrupts with abstract event channels. Because each
+ * domain gets 1024 event channels, but NR_IRQ is not that large, we
+ * must dynamically map irqs<->event channels. The event channels
+ * interface with the rest of the kernel by defining a xen interrupt
+ * chip. When an event is recieved, it is mapped to an irq and sent
+ * through the normal interrupt processing path.
+ *
+ * There are four kinds of events which can be mapped to an event
+ * channel:
+ *
+ * 1. Inter-domain notifications. This includes all the virtual
+ * device events, since they're driven by front-ends in another domain
+ * (typically dom0).
+ * 2. VIRQs, typically used for timers. These are per-cpu events.
+ * 3. IPIs.
+ * 4. Hardware interrupts. Not supported at present.
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+/*
+ * This lock protects updates to the following mapping and reference-count
+ * arrays. The lock does not need to be acquired to read the mapping tables.
+ */
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
+
+/* IRQ <-> VIRQ mapping. */
+static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+
+/* IRQ <-> IPI mapping */
+static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
+
+/* Packed IRQ information: binding type, sub-type index, and event channel. */
+struct packed_irq
+{
+ unsigned short evtchn;
+ unsigned char index;
+ unsigned char type;
+};
+
+static struct packed_irq irq_info[NR_IRQS];
+
+/* Binding types. */
+enum {
+ IRQT_UNBOUND,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/* Convenient shorthand for packed representation of an unbound IRQ. */
+#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
+
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
+ [0 ... NR_EVENT_CHANNELS-1] = -1
+};
+static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+/* Xen will never allocate port zero for any purpose. */
+#define VALID_EVTCHN(chn) ((chn) != 0)
+
+static struct irq_chip xen_dynamic_chip;
+
+/* Constructor for packed IRQ information. */
+static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
+{
+ return (struct packed_irq) { evtchn, index, type };
+}
+
+/*
+ * Accessors for packed IRQ information.
+ */
+static inline unsigned int evtchn_from_irq(int irq)
+{
+ return irq_info[irq].evtchn;
+}
+
+static inline unsigned int index_from_irq(int irq)
+{
+ return irq_info[irq].index;
+}
+
+static inline unsigned int type_from_irq(int irq)
+{
+ return irq_info[irq].type;
+}
+
+static inline unsigned long active_evtchns(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return (sh->evtchn_pending[idx] &
+ cpu_evtchn_mask[cpu][idx] &
+ ~sh->evtchn_mask[idx]);
+}
+
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+{
+ int irq = evtchn_to_irq[chn];
+
+ BUG_ON(irq == -1);
+#ifdef CONFIG_SMP
+ irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
+#endif
+
+ __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
+ __set_bit(chn, cpu_evtchn_mask[cpu]);
+
+ cpu_evtchn[chn] = cpu;
+}
+
+static void init_evtchn_cpu_bindings(void)
+{
+#ifdef CONFIG_SMP
+ struct irq_desc *desc;
+ int i;
+
+ /* By default all event channels notify CPU#0. */
+ for_each_irq_desc(i, desc)
+ desc->affinity = cpumask_of_cpu(0);
+#endif
+
+ memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
+ memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+}
+
+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
+{
+ return cpu_evtchn[evtchn];
+}
+
+static inline void clear_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void set_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline int test_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_bit(port, &s->evtchn_pending[0]);
+}
+
+
+/**
+ * notify_remote_via_irq - send event to remote end of event channel via irq
+ * @irq: irq of event channel to send event to
+ *
+ * Unlike notify_remote_via_evtchn(), this is safe to use across
+ * save/restore. Notifications on a broken connection are silently
+ * dropped.
+ */
+void notify_remote_via_irq(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ notify_remote_via_evtchn(evtchn);
+}
+EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+
+static void mask_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, &s->evtchn_mask[0]);
+}
+
+static void unmask_evtchn(int port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ unsigned int cpu = get_cpu();
+
+ BUG_ON(!irqs_disabled());
+
+ /* Slow path (hypercall) if this is a non-local port. */
+ if (unlikely(cpu != cpu_from_evtchn(port))) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+
+ sync_clear_bit(port, &s->evtchn_mask[0]);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (sync_test_bit(port, &s->evtchn_pending[0]) &&
+ !sync_test_and_set_bit(port / BITS_PER_LONG,
+ &vcpu_info->evtchn_pending_sel))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+static int find_unbound_irq(void)
+{
+ int irq;
+
+ /* Only allocate from dynirq range */
+ for_each_irq_nr(irq)
+ if (irq_bindcount[irq] == 0)
+ break;
+
+ if (irq == nr_irqs)
+ panic("No available IRQ to bind to: increase nr_irqs!\n");
+
+ return irq;
+}
+
+int bind_evtchn_to_irq(unsigned int evtchn)
+{
+ int irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = evtchn_to_irq[evtchn];
+
+ if (irq == -1) {
+ irq = find_unbound_irq();
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "event");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+ }
+
+ irq_bindcount[irq]++;
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+
+static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+{
+ struct evtchn_bind_ipi bind_ipi;
+ int evtchn, irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ if (irq == -1) {
+ irq = find_unbound_irq();
+ if (irq < 0)
+ goto out;
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "ipi");
+
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
+ evtchn = bind_ipi.port;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+
+ per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+ }
+
+ irq_bindcount[irq]++;
+
+ out:
+ spin_unlock(&irq_mapping_update_lock);
+ return irq;
+}
+
+
+static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_bind_virq bind_virq;
+ int evtchn, irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = per_cpu(virq_to_irq, cpu)[virq];
+
+ if (irq == -1) {
+ bind_virq.virq = virq;
+ bind_virq.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+ evtchn = bind_virq.port;
+
+ irq = find_unbound_irq();
+
+ dynamic_irq_init(irq);
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_level_irq, "virq");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+ }
+
+ irq_bindcount[irq]++;
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+static void unbind_from_irq(unsigned int irq)
+{
+ struct evtchn_close close;
+ int evtchn = evtchn_from_irq(irq);
+
+ spin_lock(&irq_mapping_update_lock);
+
+ if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
+ close.port = evtchn;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
+ [index_from_irq(irq)] = -1;
+ break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
+ [index_from_irq(irq)] = -1;
+ break;
+ default:
+ break;
+ }
+
+ /* Closed ports are implicitly re-bound to VCPU0. */
+ bind_evtchn_to_cpu(evtchn, 0);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_info[irq] = IRQ_UNBOUND;
+
+ dynamic_irq_cleanup(irq);
+ }
+
+ spin_unlock(&irq_mapping_update_lock);
+}
+
+int bind_evtchn_to_irqhandler(unsigned int evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ unsigned int irq;
+ int retval;
+
+ irq = bind_evtchn_to_irq(evtchn);
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+
+int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+{
+ unsigned int irq;
+ int retval;
+
+ irq = bind_virq_to_irq(virq, cpu);
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
+
+int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ unsigned int cpu,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ int irq, retval;
+
+ irq = bind_ipi_to_irq(ipi, cpu);
+ if (irq < 0)
+ return irq;
+
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+ return retval;
+ }
+
+ return irq;
+}
+
+void unbind_from_irqhandler(unsigned int irq, void *dev_id)
+{
+ free_irq(irq, dev_id);
+ unbind_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+
+void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
+{
+ int irq = per_cpu(ipi_to_irq, cpu)[vector];
+ BUG_ON(irq < 0);
+ notify_remote_via_irq(irq);
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int cpu = smp_processor_id();
+ int i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(debug_lock);
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("vcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ struct vcpu_info *v = per_cpu(xen_vcpu, i);
+ printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
+ (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
+ v->evtchn_upcall_pending,
+ v->evtchn_pending_sel);
+ }
+ printk("pending:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nmasks:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nunmasked:\n ");
+ for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\npending list:\n");
+ for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+ if (sync_test_bit(i, sh->evtchn_pending)) {
+ printk(" %d: event %d -> irq %d\n",
+ cpu_evtchn[i], i,
+ evtchn_to_irq[i]);
+ }
+ }
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+
+/*
+ * Search the CPUs pending events bitmasks. For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching. The first level is
+ * a bitset of words which contain pending event bits. The second
+ * level is a bitset of pending events themselves.
+ */
+void xen_evtchn_do_upcall(struct pt_regs *regs)
+{
+ int cpu = get_cpu();
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+ static DEFINE_PER_CPU(unsigned, nesting_count);
+ unsigned count;
+
+ do {
+ unsigned long pending_words;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+
+ if (__get_cpu_var(nesting_count)++)
+ goto out;
+
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+ /* Clear master flag /before/ clearing selector flag. */
+ wmb();
+#endif
+ pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
+ while (pending_words != 0) {
+ unsigned long pending_bits;
+ int word_idx = __ffs(pending_words);
+ pending_words &= ~(1UL << word_idx);
+
+ while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
+ int bit_idx = __ffs(pending_bits);
+ int port = (word_idx * BITS_PER_LONG) + bit_idx;
+ int irq = evtchn_to_irq[port];
+
+ if (irq != -1)
+ xen_do_IRQ(irq, regs);
+ }
+ }
+
+ BUG_ON(!irqs_disabled());
+
+ count = __get_cpu_var(nesting_count);
+ __get_cpu_var(nesting_count) = 0;
+ } while(count != 1);
+
+out:
+ put_cpu();
+}
+
+/* Rebind a new event channel to an existing irq. */
+void rebind_evtchn_irq(int evtchn, int irq)
+{
+ /* Make sure the irq is masked, since the new event channel
+ will also be masked. */
+ disable_irq(irq);
+
+ spin_lock(&irq_mapping_update_lock);
+
+ /* After resume the irq<->evtchn mappings are all cleared out */
+ BUG_ON(evtchn_to_irq[evtchn] != -1);
+ /* Expect irq to have been bound before,
+ so the bindcount should be non-0 */
+ BUG_ON(irq_bindcount[irq] == 0);
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ /* new event channels are always bound to cpu 0 */
+ irq_set_affinity(irq, cpumask_of_cpu(0));
+
+ /* Unmask the event channel. */
+ enable_irq(irq);
+}
+
+/* Rebind an evtchn so that it gets delivered to a specific cpu */
+static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+ struct evtchn_bind_vcpu bind_vcpu;
+ int evtchn = evtchn_from_irq(irq);
+
+ if (!VALID_EVTCHN(evtchn))
+ return;
+
+ /* Send future instances of this interrupt to other vcpu. */
+ bind_vcpu.port = evtchn;
+ bind_vcpu.vcpu = tcpu;
+
+ /*
+ * If this fails, it usually just indicates that we're dealing with a
+ * virq or IPI channel, which don't actually need to be rebound. Ignore
+ * it, but don't do the xenlinux-level rebind in that case.
+ */
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
+ bind_evtchn_to_cpu(evtchn, tcpu);
+}
+
+
+static void set_affinity_irq(unsigned irq, cpumask_t dest)
+{
+ unsigned tcpu = first_cpu(dest);
+ rebind_irq_to_cpu(irq, tcpu);
+}
+
+int resend_irq_on_evtchn(unsigned int irq)
+{
+ int masked, evtchn = evtchn_from_irq(irq);
+ struct shared_info *s = HYPERVISOR_shared_info;
+
+ if (!VALID_EVTCHN(evtchn))
+ return 1;
+
+ masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
+ sync_set_bit(evtchn, s->evtchn_pending);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
+ return 1;
+}
+
+static void enable_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ unmask_evtchn(evtchn);
+}
+
+static void disable_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ mask_evtchn(evtchn);
+}
+
+static void ack_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ move_native_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ clear_evtchn(evtchn);
+}
+
+static int retrigger_dynirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int ret = 0;
+
+ if (VALID_EVTCHN(evtchn)) {
+ int masked;
+
+ masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
+ sync_set_bit(evtchn, sh->evtchn_pending);
+ if (!masked)
+ unmask_evtchn(evtchn);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void restore_cpu_virqs(unsigned int cpu)
+{
+ struct evtchn_bind_virq bind_virq;
+ int virq, irq, evtchn;
+
+ for (virq = 0; virq < NR_VIRQS; virq++) {
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+ BUG_ON(irq_info[irq].index != virq);
+
+ /* Get a new binding from Xen. */
+ bind_virq.virq = virq;
+ bind_virq.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+ evtchn = bind_virq.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+ }
+}
+
+static void restore_cpu_ipis(unsigned int cpu)
+{
+ struct evtchn_bind_ipi bind_ipi;
+ int ipi, irq, evtchn;
+
+ for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+ continue;
+
+ BUG_ON(irq_info[irq].type != IRQT_IPI);
+ BUG_ON(irq_info[irq].index != ipi);
+
+ /* Get a new binding from Xen. */
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
+ evtchn = bind_ipi.port;
+
+ /* Record the new mapping. */
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ /* Ready for use. */
+ unmask_evtchn(evtchn);
+
+ }
+}
+
+/* Clear an irq's pending state, in preparation for polling on it */
+void xen_clear_irq_pending(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ clear_evtchn(evtchn);
+}
+
+void xen_set_irq_pending(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn))
+ set_evtchn(evtchn);
+}
+
+bool xen_test_irq_pending(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ bool ret = false;
+
+ if (VALID_EVTCHN(evtchn))
+ ret = test_evtchn(evtchn);
+
+ return ret;
+}
+
+/* Poll waiting for an irq to become pending. In the usual case, the
+ irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq(int irq)
+{
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
+
+ if (VALID_EVTCHN(evtchn)) {
+ struct sched_poll poll;
+
+ poll.nr_ports = 1;
+ poll.timeout = 0;
+ set_xen_guest_handle(poll.ports, &evtchn);
+
+ if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
+ BUG();
+ }
+}
+
+void xen_irq_resume(void)
+{
+ unsigned int cpu, irq, evtchn;
+
+ init_evtchn_cpu_bindings();
+
+ /* New event-channel space is not 'live' yet. */
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ mask_evtchn(evtchn);
+
+ /* No IRQ <-> event-channel mappings. */
+ for_each_irq_nr(irq)
+ irq_info[irq].evtchn = 0; /* zap event-channel binding */
+
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+ evtchn_to_irq[evtchn] = -1;
+
+ for_each_possible_cpu(cpu) {
+ restore_cpu_virqs(cpu);
+ restore_cpu_ipis(cpu);
+ }
+}
+
+static struct irq_chip xen_dynamic_chip __read_mostly = {
+ .name = "xen-dyn",
+ .mask = disable_dynirq,
+ .unmask = enable_dynirq,
+ .ack = ack_dynirq,
+ .set_affinity = set_affinity_irq,
+ .retrigger = retrigger_dynirq,
+};
+
+void __init xen_init_IRQ(void)
+{
+ int i;
+
+ init_evtchn_cpu_bindings();
+
+ /* No event channels are 'live' right now. */
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ mask_evtchn(i);
+
+ /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+ for_each_irq_nr(i)
+ irq_bindcount[i] = 0;
+
+ irq_ctx_init(smp_processor_id());
+}
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
new file mode 100644
index 0000000..0707714
--- /dev/null
+++ b/drivers/xen/features.c
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * features.c
+ *
+ * Xen feature flags.
+ *
+ * Copyright (c) 2006, Ian Campbell, XenSource Inc.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/features.h>
+
+u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
+EXPORT_SYMBOL_GPL(xen_features);
+
+void xen_setup_features(void)
+{
+ struct xen_feature_info fi;
+ int i, j;
+
+ for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
+ fi.submap_idx = i;
+ if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
+ break;
+ for (j = 0; j < 32; j++)
+ xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
+ }
+}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
new file mode 100644
index 0000000..06592b9
--- /dev/null
+++ b/drivers/xen/grant-table.c
@@ -0,0 +1,557 @@
+/******************************************************************************
+ * grant_table.c
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+#include <asm/sync_bitops.h>
+
+
+/* External tools reserve first few grant table entries. */
+#define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+
+static grant_ref_t **gnttab_list;
+static unsigned int nr_grant_frames;
+static unsigned int boot_max_nr_grant_frames;
+static int gnttab_free_count;
+static grant_ref_t gnttab_free_head;
+static DEFINE_SPINLOCK(gnttab_list_lock);
+
+static struct grant_entry *shared;
+
+static struct gnttab_free_callback *gnttab_free_callback_list;
+
+static int gnttab_expand(unsigned int req_entries);
+
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+
+static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
+{
+ return &gnttab_list[(entry) / RPP][(entry) % RPP];
+}
+/* This can be used as an l-value */
+#define gnttab_entry(entry) (*__gnttab_entry(entry))
+
+static int get_free_entries(unsigned count)
+{
+ unsigned long flags;
+ int ref, rc;
+ grant_ref_t head;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+
+ if ((gnttab_free_count < count) &&
+ ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return rc;
+ }
+
+ ref = head = gnttab_free_head;
+ gnttab_free_count -= count;
+ while (count-- > 1)
+ head = gnttab_entry(head);
+ gnttab_free_head = gnttab_entry(head);
+ gnttab_entry(head) = GNTTAB_LIST_END;
+
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+ return ref;
+}
+
+static void do_free_callbacks(void)
+{
+ struct gnttab_free_callback *callback, *next;
+
+ callback = gnttab_free_callback_list;
+ gnttab_free_callback_list = NULL;
+
+ while (callback != NULL) {
+ next = callback->next;
+ if (gnttab_free_count >= callback->count) {
+ callback->next = NULL;
+ callback->fn(callback->arg);
+ } else {
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ }
+ callback = next;
+ }
+}
+
+static inline void check_free_callbacks(void)
+{
+ if (unlikely(gnttab_free_callback_list))
+ do_free_callbacks();
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ gnttab_entry(ref) = gnttab_free_head;
+ gnttab_free_head = ref;
+ gnttab_free_count++;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+static void update_grant_entry(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned flags)
+{
+ /*
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ */
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = flags;
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly)
+{
+ update_grant_entry(ref, domid, frame,
+ GTF_permit_access | (readonly ? GTF_readonly : 0));
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
+
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+ int readonly)
+{
+ int ref;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+
+int gnttab_query_foreign_access(grant_ref_t ref)
+{
+ u16 nflags;
+
+ nflags = shared[ref].flags;
+
+ return (nflags & (GTF_reading|GTF_writing));
+}
+EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+ u16 flags, nflags;
+
+ nflags = shared[ref].flags;
+ do {
+ flags = nflags;
+ if (flags & (GTF_reading|GTF_writing)) {
+ printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+ return 0;
+ }
+ } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
+ unsigned long page)
+{
+ if (gnttab_end_foreign_access_ref(ref, readonly)) {
+ put_free_entry(ref);
+ if (page != 0)
+ free_page(page);
+ } else {
+ /* XXX This needs to be fixed so that the ref and page are
+ placed on a list to be freed up later. */
+ printk(KERN_WARNING
+ "WARNING: leaking g.e. and page still in use!\n");
+ }
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
+
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+{
+ int ref;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
+{
+ update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+ unsigned long frame;
+ u16 flags;
+
+ /*
+ * If a transfer is not even yet started, try to reclaim the grant
+ * reference and return failure (== 0).
+ */
+ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ return 0;
+ cpu_relax();
+ }
+
+ /* If a transfer is in progress then wait until it is completed. */
+ while (!(flags & GTF_transfer_completed)) {
+ flags = shared[ref].flags;
+ cpu_relax();
+ }
+
+ rmb(); /* Read the frame number /after/ reading completion status. */
+ frame = shared[ref].frame;
+ BUG_ON(frame == 0);
+
+ return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+ unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
+ put_free_entry(ref);
+ return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
+
+void gnttab_free_grant_reference(grant_ref_t ref)
+{
+ put_free_entry(ref);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
+
+void gnttab_free_grant_references(grant_ref_t head)
+{
+ grant_ref_t ref;
+ unsigned long flags;
+ int count = 1;
+ if (head == GNTTAB_LIST_END)
+ return;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ ref = head;
+ while (gnttab_entry(ref) != GNTTAB_LIST_END) {
+ ref = gnttab_entry(ref);
+ count++;
+ }
+ gnttab_entry(ref) = gnttab_free_head;
+ gnttab_free_head = head;
+ gnttab_free_count += count;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
+
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+ int h = get_free_entries(count);
+
+ if (h < 0)
+ return -ENOSPC;
+
+ *head = h;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
+
+int gnttab_empty_grant_references(const grant_ref_t *private_head)
+{
+ return (*private_head == GNTTAB_LIST_END);
+}
+EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
+
+int gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+ grant_ref_t g = *private_head;
+ if (unlikely(g == GNTTAB_LIST_END))
+ return -ENOSPC;
+ *private_head = gnttab_entry(g);
+ return g;
+}
+EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
+
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+ grant_ref_t release)
+{
+ gnttab_entry(release) = *private_head;
+ *private_head = release;
+}
+EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
+
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (callback->next)
+ goto out;
+ callback->fn = fn;
+ callback->arg = arg;
+ callback->count = count;
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ check_free_callbacks();
+out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
+
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+ struct gnttab_free_callback **pcb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+ if (*pcb == callback) {
+ *pcb = callback->next;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
+
+static int grow_gnttab_list(unsigned int more_frames)
+{
+ unsigned int new_nr_grant_frames, extra_entries, i;
+ unsigned int nr_glist_frames, new_nr_glist_frames;
+
+ new_nr_grant_frames = nr_grant_frames + more_frames;
+ extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
+
+ nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ new_nr_glist_frames =
+ (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
+ if (!gnttab_list[i])
+ goto grow_nomem;
+ }
+
+
+ for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(i) = gnttab_free_head;
+ gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ gnttab_free_count += extra_entries;
+
+ nr_grant_frames = new_nr_grant_frames;
+
+ check_free_callbacks();
+
+ return 0;
+
+grow_nomem:
+ for ( ; i >= nr_glist_frames; i--)
+ free_page((unsigned long) gnttab_list[i]);
+ return -ENOMEM;
+}
+
+static unsigned int __max_nr_grant_frames(void)
+{
+ struct gnttab_query_size query;
+ int rc;
+
+ query.dom = DOMID_SELF;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+ if ((rc < 0) || (query.status != GNTST_okay))
+ return 4; /* Legacy max supported number of frames */
+
+ return query.max_nr_frames;
+}
+
+static inline unsigned int max_nr_grant_frames(void)
+{
+ unsigned int xen_max = __max_nr_grant_frames();
+
+ if (xen_max > boot_max_nr_grant_frames)
+ return boot_max_nr_grant_frames;
+ return xen_max;
+}
+
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+{
+ struct gnttab_setup_table setup;
+ unsigned long *frames;
+ unsigned int nr_gframes = end_idx + 1;
+ int rc;
+
+ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+ if (!frames)
+ return -ENOMEM;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = nr_gframes;
+ set_xen_guest_handle(setup.frame_list, frames);
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
+ if (rc == -ENOSYS) {
+ kfree(frames);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || setup.status);
+
+ rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
+ &shared);
+ BUG_ON(rc);
+
+ kfree(frames);
+
+ return 0;
+}
+
+int gnttab_resume(void)
+{
+ if (max_nr_grant_frames() < nr_grant_frames)
+ return -ENOSYS;
+ return gnttab_map(0, nr_grant_frames - 1);
+}
+
+int gnttab_suspend(void)
+{
+ arch_gnttab_unmap_shared(shared, nr_grant_frames);
+ return 0;
+}
+
+static int gnttab_expand(unsigned int req_entries)
+{
+ int rc;
+ unsigned int cur, extra;
+
+ cur = nr_grant_frames;
+ extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+ GREFS_PER_GRANT_FRAME);
+ if (cur + extra > max_nr_grant_frames())
+ return -ENOSPC;
+
+ rc = gnttab_map(cur, cur + extra - 1);
+ if (rc == 0)
+ rc = grow_gnttab_list(extra);
+
+ return rc;
+}
+
+static int __devinit gnttab_init(void)
+{
+ int i;
+ unsigned int max_nr_glist_frames, nr_glist_frames;
+ unsigned int nr_init_grefs;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ nr_grant_frames = 1;
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
+
+ /* Determine the maximum number of frames required for the
+ * grant reference free list on the current hypervisor.
+ */
+ max_nr_glist_frames = (boot_max_nr_grant_frames *
+ GREFS_PER_GRANT_FRAME / RPP);
+
+ gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
+ GFP_KERNEL);
+ if (gnttab_list == NULL)
+ return -ENOMEM;
+
+ nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ for (i = 0; i < nr_glist_frames; i++) {
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
+ if (gnttab_list[i] == NULL)
+ goto ini_nomem;
+ }
+
+ if (gnttab_resume() < 0)
+ return -ENODEV;
+
+ nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+
+ for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
+ gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
+ gnttab_free_head = NR_RESERVED_ENTRIES;
+
+ printk("Grant table initialized\n");
+ return 0;
+
+ ini_nomem:
+ for (i--; i >= 0; i--)
+ free_page((unsigned long)gnttab_list[i]);
+ kfree(gnttab_list);
+ return -ENOMEM;
+}
+
+core_initcall(gnttab_init);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
new file mode 100644
index 0000000..9b91617
--- /dev/null
+++ b/drivers/xen/manage.c
@@ -0,0 +1,252 @@
+/*
+ * Handle extern requests for shutdown, reboot and sysrq
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
+
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/hvc-console.h>
+#include <xen/xen-ops.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+enum shutdown_state {
+ SHUTDOWN_INVALID = -1,
+ SHUTDOWN_POWEROFF = 0,
+ SHUTDOWN_SUSPEND = 2,
+ /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+ report a crash, not be instructed to crash!
+ HALT is the same as POWEROFF, as far as we're concerned. The tools use
+ the distinction when we return the reason code to them. */
+ SHUTDOWN_HALT = 4,
+};
+
+/* Ignore multiple shutdown requests. */
+static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+
+#ifdef CONFIG_PM_SLEEP
+static int xen_suspend(void *data)
+{
+ int *cancelled = data;
+ int err;
+
+ BUG_ON(!irqs_disabled());
+
+ err = device_power_down(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
+ err);
+ return err;
+ }
+
+ xen_mm_pin_all();
+ gnttab_suspend();
+ xen_pre_suspend();
+
+ /*
+ * This hypercall returns 1 if suspend was cancelled
+ * or the domain was merely checkpointed, and 0 if it
+ * is resuming in a new domain.
+ */
+ *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ xen_post_suspend(*cancelled);
+ gnttab_resume();
+ xen_mm_unpin_all();
+
+ device_power_up(PMSG_RESUME);
+
+ if (!*cancelled) {
+ xen_irq_resume();
+ xen_console_resume();
+ xen_timer_resume();
+ }
+
+ return 0;
+}
+
+static void do_suspend(void)
+{
+ int err;
+ int cancelled = 1;
+
+ shutting_down = SHUTDOWN_SUSPEND;
+
+#ifdef CONFIG_PREEMPT
+ /* If the kernel is preemptible, we need to freeze all the processes
+ to prevent them from being in the middle of a pagetable update
+ during suspend. */
+ err = freeze_processes();
+ if (err) {
+ printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+ return;
+ }
+#endif
+
+ err = device_suspend(PMSG_SUSPEND);
+ if (err) {
+ printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+ goto out;
+ }
+
+ printk("suspending xenbus...\n");
+ /* XXX use normal device tree? */
+ xenbus_suspend();
+
+ err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
+ if (err) {
+ printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+ goto out;
+ }
+
+ if (!cancelled) {
+ xen_arch_resume();
+ xenbus_resume();
+ } else
+ xenbus_suspend_cancel();
+
+ device_resume(PMSG_RESUME);
+
+ /* Make sure timer events get retriggered on all CPUs */
+ clock_was_set();
+out:
+#ifdef CONFIG_PREEMPT
+ thaw_processes();
+#endif
+ shutting_down = SHUTDOWN_INVALID;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static void shutdown_handler(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ char *str;
+ struct xenbus_transaction xbt;
+ int err;
+
+ if (shutting_down != SHUTDOWN_INVALID)
+ return;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ return;
+
+ str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
+ /* Ignore read errors and empty reads. */
+ if (XENBUS_IS_ERR_READ(str)) {
+ xenbus_transaction_end(xbt, 1);
+ return;
+ }
+
+ xenbus_write(xbt, "control", "shutdown", "");
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == -EAGAIN) {
+ kfree(str);
+ goto again;
+ }
+
+ if (strcmp(str, "poweroff") == 0 ||
+ strcmp(str, "halt") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF;
+ orderly_poweroff(false);
+ } else if (strcmp(str, "reboot") == 0) {
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
+ ctrl_alt_del();
+#ifdef CONFIG_PM_SLEEP
+ } else if (strcmp(str, "suspend") == 0) {
+ do_suspend();
+#endif
+ } else {
+ printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
+ shutting_down = SHUTDOWN_INVALID;
+ }
+
+ kfree(str);
+}
+
+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
+ unsigned int len)
+{
+ char sysrq_key = '\0';
+ struct xenbus_transaction xbt;
+ int err;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ return;
+ if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+ printk(KERN_ERR "Unable to read sysrq code in "
+ "control/sysrq\n");
+ xenbus_transaction_end(xbt, 1);
+ return;
+ }
+
+ if (sysrq_key != '\0')
+ xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == -EAGAIN)
+ goto again;
+
+ if (sysrq_key != '\0')
+ handle_sysrq(sysrq_key, NULL);
+}
+
+static struct xenbus_watch shutdown_watch = {
+ .node = "control/shutdown",
+ .callback = shutdown_handler
+};
+
+static struct xenbus_watch sysrq_watch = {
+ .node = "control/sysrq",
+ .callback = sysrq_handler
+};
+
+static int setup_shutdown_watcher(void)
+{
+ int err;
+
+ err = register_xenbus_watch(&shutdown_watch);
+ if (err) {
+ printk(KERN_ERR "Failed to set shutdown watcher\n");
+ return err;
+ }
+
+ err = register_xenbus_watch(&sysrq_watch);
+ if (err) {
+ printk(KERN_ERR "Failed to set sysrq watcher\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int shutdown_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ setup_shutdown_watcher();
+ return NOTIFY_DONE;
+}
+
+static int __init setup_shutdown_event(void)
+{
+ static struct notifier_block xenstore_notifier = {
+ .notifier_call = shutdown_event
+ };
+ register_xenstore_notifier(&xenstore_notifier);
+
+ return 0;
+}
+
+subsys_initcall(setup_shutdown_event);
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
new file mode 100644
index 0000000..5571f5b
--- /dev/null
+++ b/drivers/xen/xenbus/Makefile
@@ -0,0 +1,7 @@
+obj-y += xenbus.o
+
+xenbus-objs =
+xenbus-objs += xenbus_client.o
+xenbus-objs += xenbus_comms.o
+xenbus-objs += xenbus_xs.o
+xenbus-objs += xenbus_probe.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
new file mode 100644
index 0000000..9678b3e
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -0,0 +1,569 @@
+/******************************************************************************
+ * Client-facing interface for the Xenbus driver. In other words, the
+ * interface between the Xenbus and the device-specific code, be it the
+ * frontend or the backend of that driver.
+ *
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+ static const char *const name[] = {
+ [ XenbusStateUnknown ] = "Unknown",
+ [ XenbusStateInitialising ] = "Initialising",
+ [ XenbusStateInitWait ] = "InitWait",
+ [ XenbusStateInitialised ] = "Initialised",
+ [ XenbusStateConnected ] = "Connected",
+ [ XenbusStateClosing ] = "Closing",
+ [ XenbusStateClosed ] = "Closed",
+ };
+ return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+EXPORT_SYMBOL_GPL(xenbus_strstate);
+
+/**
+ * xenbus_watch_path - register a watch
+ * @dev: xenbus device
+ * @path: path to watch
+ * @watch: watch to register
+ * @callback: callback to register
+ *
+ * Register a @watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given @callback function as the callback. Return 0 on
+ * success, or -errno on error. On success, the given @path will be saved as
+ * @watch->node, and remains the caller's to free. On error, @watch->node will
+ * be NULL, the device will switch to %XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+ struct xenbus_watch *watch,
+ void (*callback)(struct xenbus_watch *,
+ const char **, unsigned int))
+{
+ int err;
+
+ watch->node = path;
+ watch->callback = callback;
+
+ err = register_xenbus_watch(watch);
+
+ if (err) {
+ watch->node = NULL;
+ watch->callback = NULL;
+ xenbus_dev_fatal(dev, err, "adding watch on %s", path);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_path);
+
+
+/**
+ * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
+ * @dev: xenbus device
+ * @watch: watch to register
+ * @callback: callback to register
+ * @pathfmt: format of path to watch
+ *
+ * Register a watch on the given @path, using the given xenbus_watch
+ * structure for storage, and the given @callback function as the callback.
+ * Return 0 on success, or -errno on error. On success, the watched path
+ * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
+ * kfree(). On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to %XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_pathfmt(struct xenbus_device *dev,
+ struct xenbus_watch *watch,
+ void (*callback)(struct xenbus_watch *,
+ const char **, unsigned int),
+ const char *pathfmt, ...)
+{
+ int err;
+ va_list ap;
+ char *path;
+
+ va_start(ap, pathfmt);
+ path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
+ va_end(ap);
+
+ if (!path) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
+ return -ENOMEM;
+ }
+ err = xenbus_watch_path(dev, path, watch, callback);
+
+ if (err)
+ kfree(path);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
+
+
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @xbt: transaction handle
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error. On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+ /* We check whether the state is currently set to the given value, and
+ if not, then the state is set. We don't want to unconditionally
+ write the given state, because we don't want to fire watches
+ unnecessarily. Furthermore, if the node has gone, we don't write
+ to it, as the device will be tearing down, and we don't want to
+ resurrect that directory.
+
+ Note that, because of this cached value of our state, this function
+ will not work inside a Xenstore transaction (something it was
+ trying to in the past) because dev->state would not get reset if
+ the transaction was aborted.
+
+ */
+
+ int current_state;
+ int err;
+
+ if (state == dev->state)
+ return 0;
+
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
+ &current_state);
+ if (err != 1)
+ return 0;
+
+ err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
+ if (err) {
+ if (state != XenbusStateClosing) /* Avoid looping */
+ xenbus_dev_fatal(dev, err, "writing new state");
+ return err;
+ }
+
+ dev->state = state;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_switch_state);
+
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+ xenbus_switch_state(dev, XenbusStateClosed);
+ complete(&dev->down);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
+
+/**
+ * Return the path to the error node for the given device, or NULL on failure.
+ * If the value returned is non-NULL, then it is the caller's to kfree.
+ */
+static char *error_path(struct xenbus_device *dev)
+{
+ return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+}
+
+
+static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
+ const char *fmt, va_list ap)
+{
+ int ret;
+ unsigned int len;
+ char *printf_buffer = NULL;
+ char *path_buffer = NULL;
+
+#define PRINTF_BUFFER_SIZE 4096
+ printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+ if (printf_buffer == NULL)
+ goto fail;
+
+ len = sprintf(printf_buffer, "%i ", -err);
+ ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+
+ BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
+
+ dev_err(&dev->dev, "%s\n", printf_buffer);
+
+ path_buffer = error_path(dev);
+
+ if (path_buffer == NULL) {
+ dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+ dev->nodename, printf_buffer);
+ goto fail;
+ }
+
+ if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
+ dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+ dev->nodename, printf_buffer);
+ goto fail;
+ }
+
+fail:
+ kfree(printf_buffer);
+ kfree(path_buffer);
+}
+
+
+/**
+ * xenbus_dev_error
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ xenbus_va_dev_error(dev, err, fmt, ap);
+ va_end(ap);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_error);
+
+/**
+ * xenbus_dev_fatal
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ xenbus_va_dev_error(dev, err, fmt, ap);
+ va_end(ap);
+
+ xenbus_switch_state(dev, XenbusStateClosing);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
+
+/**
+ * xenbus_grant_ring
+ * @dev: xenbus device
+ * @ring_mfn: mfn of ring to grant
+
+ * Grant access to the given @ring_mfn to the peer of the given device. Return
+ * 0 on success, or -errno on error. On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+{
+ int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
+ if (err < 0)
+ xenbus_dev_fatal(dev, err, "granting access to ring page");
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_grant_ring);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port. Return 0 on success, or -errno on error. On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
+{
+ struct evtchn_alloc_unbound alloc_unbound;
+ int err;
+
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = dev->otherend_id;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err)
+ xenbus_dev_fatal(dev, err, "allocating event channel");
+ else
+ *port = alloc_unbound.port;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
+
+
+/**
+ * Bind to an existing interdomain event channel in another domain. Returns 0
+ * on success and stores the local port in *port. On error, returns -errno,
+ * switches the device to XenbusStateClosing, and saves the error in XenStore.
+ */
+int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
+{
+ struct evtchn_bind_interdomain bind_interdomain;
+ int err;
+
+ bind_interdomain.remote_dom = dev->otherend_id;
+ bind_interdomain.remote_port = remote_port;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+ &bind_interdomain);
+ if (err)
+ xenbus_dev_fatal(dev, err,
+ "binding to event channel %d from domain %d",
+ remote_port, dev->otherend_id);
+ else
+ *port = bind_interdomain.local_port;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or -errno on error.
+ */
+int xenbus_free_evtchn(struct xenbus_device *dev, int port)
+{
+ struct evtchn_close close;
+ int err;
+
+ close.port = port;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ if (err)
+ xenbus_dev_error(dev, err, "freeing event channel %d", port);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
+
+
+/**
+ * xenbus_map_ring_valloc
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @vaddr: pointer to address to be filled out by mapping
+ *
+ * Based on Rusty Russell's skeleton driver's map_page.
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
+ * page to that address, and sets *vaddr to that address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+{
+ struct gnttab_map_grant_ref op = {
+ .flags = GNTMAP_host_map,
+ .ref = gnt_ref,
+ .dom = dev->otherend_id,
+ };
+ struct vm_struct *area;
+
+ *vaddr = NULL;
+
+ area = xen_alloc_vm_area(PAGE_SIZE);
+ if (!area)
+ return -ENOMEM;
+
+ op.host_addr = (unsigned long)area->addr;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay) {
+ xen_free_vm_area(area);
+ xenbus_dev_fatal(dev, op.status,
+ "mapping in shared page %d from domain %d",
+ gnt_ref, dev->otherend_id);
+ return op.status;
+ }
+
+ /* Stuff the handle in an unused field */
+ area->phys_addr = (unsigned long)op.handle;
+
+ *vaddr = area->addr;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+
+/**
+ * xenbus_map_ring
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @handle: pointer to grant handle to be filled
+ * @vaddr: address to be mapped to
+ *
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring does not allocate the virtual address space (you must do
+ * this yourself!). It only maps in the page to the specified address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
+ grant_handle_t *handle, void *vaddr)
+{
+ struct gnttab_map_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ .flags = GNTMAP_host_map,
+ .ref = gnt_ref,
+ .dom = dev->otherend_id,
+ };
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay) {
+ xenbus_dev_fatal(dev, op.status,
+ "mapping in shared page %d from domain %d",
+ gnt_ref, dev->otherend_id);
+ } else
+ *handle = op.handle;
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring);
+
+
+/**
+ * xenbus_unmap_ring_vfree
+ * @dev: xenbus device
+ * @vaddr: addr to unmap
+ *
+ * Based on Rusty Russell's skeleton driver's unmap_page.
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Use xenbus_unmap_ring_vfree if you mapped in your memory with
+ * xenbus_map_ring_valloc (it will free the virtual address space).
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
+{
+ struct vm_struct *area;
+ struct gnttab_unmap_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ };
+
+ /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
+ * method so that we don't have to muck with vmalloc internals here.
+ * We could force the user to hang on to their struct vm_struct from
+ * xenbus_map_ring_valloc, but these 6 lines considerably simplify
+ * this API.
+ */
+ read_lock(&vmlist_lock);
+ for (area = vmlist; area != NULL; area = area->next) {
+ if (area->addr == vaddr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!area) {
+ xenbus_dev_error(dev, -ENOENT,
+ "can't find mapped virtual address %p", vaddr);
+ return GNTST_bad_virt_addr;
+ }
+
+ op.handle = (grant_handle_t)area->phys_addr;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status == GNTST_okay)
+ xen_free_vm_area(area);
+ else
+ xenbus_dev_error(dev, op.status,
+ "unmapping page at handle %d error %d",
+ (int16_t)area->phys_addr, op.status);
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handle: grant handle
+ * @vaddr: addr to unmap
+ *
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring(struct xenbus_device *dev,
+ grant_handle_t handle, void *vaddr)
+{
+ struct gnttab_unmap_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ .handle = handle,
+ };
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay)
+ xenbus_dev_error(dev, op.status,
+ "unmapping page at handle %d error %d",
+ handle, op.status);
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
+
+
+/**
+ * xenbus_read_driver_state
+ * @path: path for driver
+ *
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateUnknown if no state can be read.
+ */
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+ enum xenbus_state result;
+ int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
+ if (err)
+ result = XenbusStateUnknown;
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
new file mode 100644
index 0000000..090c61e
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -0,0 +1,234 @@
+/******************************************************************************
+ * xenbus_comms.c
+ *
+ * Low level code to talks to Xen Store: ringbuffer and event channel.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <xen/xenbus.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include "xenbus_comms.h"
+
+static int xenbus_irq;
+
+static DECLARE_WORK(probe_work, xenbus_probe);
+
+static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+
+static irqreturn_t wake_waiting(int irq, void *unused)
+{
+ if (unlikely(xenstored_ready == 0)) {
+ xenstored_ready = 1;
+ schedule_work(&probe_work);
+ }
+
+ wake_up(&xb_waitq);
+ return IRQ_HANDLED;
+}
+
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
+{
+ return ((prod - cons) <= XENSTORE_RING_SIZE);
+}
+
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+ XENSTORE_RING_IDX prod,
+ char *buf, uint32_t *len)
+{
+ *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+ if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+ *len = XENSTORE_RING_SIZE - (prod - cons);
+ return buf + MASK_XENSTORE_IDX(prod);
+}
+
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+ XENSTORE_RING_IDX prod,
+ const char *buf, uint32_t *len)
+{
+ *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+ if ((prod - cons) < *len)
+ *len = prod - cons;
+ return buf + MASK_XENSTORE_IDX(cons);
+}
+
+/**
+ * xb_write - low level write
+ * @data: buffer to send
+ * @len: length of buffer
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int xb_write(const void *data, unsigned len)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ XENSTORE_RING_IDX cons, prod;
+ int rc;
+
+ while (len != 0) {
+ void *dst;
+ unsigned int avail;
+
+ rc = wait_event_interruptible(
+ xb_waitq,
+ (intf->req_prod - intf->req_cons) !=
+ XENSTORE_RING_SIZE);
+ if (rc < 0)
+ return rc;
+
+ /* Read indexes, then verify. */
+ cons = intf->req_cons;
+ prod = intf->req_prod;
+ if (!check_indexes(cons, prod)) {
+ intf->req_cons = intf->req_prod = 0;
+ return -EIO;
+ }
+
+ dst = get_output_chunk(cons, prod, intf->req, &avail);
+ if (avail == 0)
+ continue;
+ if (avail > len)
+ avail = len;
+
+ /* Must write data /after/ reading the consumer index. */
+ mb();
+
+ memcpy(dst, data, avail);
+ data += avail;
+ len -= avail;
+
+ /* Other side must not see new producer until data is there. */
+ wmb();
+ intf->req_prod += avail;
+
+ /* Implies mb(): other side will see the updated producer. */
+ notify_remote_via_evtchn(xen_store_evtchn);
+ }
+
+ return 0;
+}
+
+int xb_data_to_read(void)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ return (intf->rsp_cons != intf->rsp_prod);
+}
+
+int xb_wait_for_data_to_read(void)
+{
+ return wait_event_interruptible(xb_waitq, xb_data_to_read());
+}
+
+int xb_read(void *data, unsigned len)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ XENSTORE_RING_IDX cons, prod;
+ int rc;
+
+ while (len != 0) {
+ unsigned int avail;
+ const char *src;
+
+ rc = xb_wait_for_data_to_read();
+ if (rc < 0)
+ return rc;
+
+ /* Read indexes, then verify. */
+ cons = intf->rsp_cons;
+ prod = intf->rsp_prod;
+ if (!check_indexes(cons, prod)) {
+ intf->rsp_cons = intf->rsp_prod = 0;
+ return -EIO;
+ }
+
+ src = get_input_chunk(cons, prod, intf->rsp, &avail);
+ if (avail == 0)
+ continue;
+ if (avail > len)
+ avail = len;
+
+ /* Must read data /after/ reading the producer index. */
+ rmb();
+
+ memcpy(data, src, avail);
+ data += avail;
+ len -= avail;
+
+ /* Other side must not see free space until we've copied out */
+ mb();
+ intf->rsp_cons += avail;
+
+ pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
+
+ /* Implies mb(): other side will see the updated consumer. */
+ notify_remote_via_evtchn(xen_store_evtchn);
+ }
+
+ return 0;
+}
+
+/**
+ * xb_init_comms - Set up interrupt handler off store event channel.
+ */
+int xb_init_comms(void)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+
+ if (intf->req_prod != intf->req_cons)
+ printk(KERN_ERR "XENBUS request ring is not quiescent "
+ "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
+
+ if (intf->rsp_prod != intf->rsp_cons) {
+ printk(KERN_WARNING "XENBUS response ring is not quiescent "
+ "(%08x:%08x): fixing up\n",
+ intf->rsp_cons, intf->rsp_prod);
+ intf->rsp_cons = intf->rsp_prod;
+ }
+
+ if (xenbus_irq) {
+ /* Already have an irq; assume we're resuming */
+ rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
+ } else {
+ int err;
+ err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
+ 0, "xenbus", &xb_waitq);
+ if (err <= 0) {
+ printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+ return err;
+ }
+
+ xenbus_irq = err;
+ }
+
+ return 0;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
new file mode 100644
index 0000000..c21db75
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -0,0 +1,46 @@
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_COMMS_H
+#define _XENBUS_COMMS_H
+
+int xs_init(void);
+int xb_init_comms(void);
+
+/* Low level routines. */
+int xb_write(const void *data, unsigned len);
+int xb_read(void *data, unsigned len);
+int xb_data_to_read(void);
+int xb_wait_for_data_to_read(void);
+int xs_input_avail(void);
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
+
+#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
new file mode 100644
index 0000000..7f24a98
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -0,0 +1,962 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __func__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
+static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+
+static void xenbus_dev_shutdown(struct device *_dev);
+
+/* If something in array of ids matches this device, return it. */
+static const struct xenbus_device_id *
+match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
+{
+ for (; *arr->devicetype != '\0'; arr++) {
+ if (!strcmp(arr->devicetype, dev->devicetype))
+ return arr;
+ }
+ return NULL;
+}
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
+{
+ struct xenbus_driver *drv = to_xenbus_driver(_drv);
+
+ if (!drv->ids)
+ return 0;
+
+ return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
+}
+
+static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+
+ if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ nodename = strchr(nodename, '/');
+ if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+ return -EINVAL;
+ }
+
+ strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+ if (!strchr(bus_id, '/')) {
+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+ return -EINVAL;
+ }
+ *strchr(bus_id, '/') = '-';
+ return 0;
+}
+
+
+static void free_otherend_details(struct xenbus_device *dev)
+{
+ kfree(dev->otherend);
+ dev->otherend = NULL;
+}
+
+
+static void free_otherend_watch(struct xenbus_device *dev)
+{
+ if (dev->otherend_watch.node) {
+ unregister_xenbus_watch(&dev->otherend_watch);
+ kfree(dev->otherend_watch.node);
+ dev->otherend_watch.node = NULL;
+ }
+}
+
+
+int read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node)
+{
+ int err = xenbus_gather(XBT_NIL, xendev->nodename,
+ id_node, "%i", &xendev->otherend_id,
+ path_node, NULL, &xendev->otherend,
+ NULL);
+ if (err) {
+ xenbus_dev_fatal(xendev, err,
+ "reading other end details from %s",
+ xendev->nodename);
+ return err;
+ }
+ if (strlen(xendev->otherend) == 0 ||
+ !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
+ xenbus_dev_fatal(xendev, -ENOENT,
+ "unable to read other end from %s. "
+ "missing or inaccessible.",
+ xendev->nodename);
+ free_otherend_details(xendev);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+
+static int read_backend_details(struct xenbus_device *xendev)
+{
+ return read_otherend_details(xendev, "backend-id", "backend");
+}
+
+
+/* Bus type for frontend drivers. */
+static struct xen_bus_type xenbus_frontend = {
+ .root = "device",
+ .levels = 2, /* device/type/<id> */
+ .get_bus_id = frontend_bus_id,
+ .probe = xenbus_probe_frontend,
+ .bus = {
+ .name = "xen",
+ .match = xenbus_match,
+ .uevent = xenbus_uevent,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+ .shutdown = xenbus_dev_shutdown,
+ },
+};
+
+static void otherend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ struct xenbus_device *dev =
+ container_of(watch, struct xenbus_device, otherend_watch);
+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+ enum xenbus_state state;
+
+ /* Protect us against watches firing on old details when the otherend
+ details change, say immediately after a resume. */
+ if (!dev->otherend ||
+ strncmp(dev->otherend, vec[XS_WATCH_PATH],
+ strlen(dev->otherend))) {
+ dev_dbg(&dev->dev, "Ignoring watch at %s\n",
+ vec[XS_WATCH_PATH]);
+ return;
+ }
+
+ state = xenbus_read_driver_state(dev->otherend);
+
+ dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
+ state, xenbus_strstate(state), dev->otherend_watch.node,
+ vec[XS_WATCH_PATH]);
+
+ /*
+ * Ignore xenbus transitions during shutdown. This prevents us doing
+ * work that can fail e.g., when the rootfs is gone.
+ */
+ if (system_state > SYSTEM_RUNNING) {
+ struct xen_bus_type *bus = bus;
+ bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+ /* If we're frontend, drive the state machine to Closed. */
+ /* This should cause the backend to release our resources. */
+ if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+ xenbus_frontend_closed(dev);
+ return;
+ }
+
+ if (drv->otherend_changed)
+ drv->otherend_changed(dev, state);
+}
+
+
+static int talk_to_otherend(struct xenbus_device *dev)
+{
+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+
+ free_otherend_watch(dev);
+ free_otherend_details(dev);
+
+ return drv->read_otherend_details(dev);
+}
+
+
+static int watch_otherend(struct xenbus_device *dev)
+{
+ return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
+ "%s/%s", dev->otherend, "state");
+}
+
+
+int xenbus_dev_probe(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+ const struct xenbus_device_id *id;
+ int err;
+
+ DPRINTK("%s", dev->nodename);
+
+ if (!drv->probe) {
+ err = -ENODEV;
+ goto fail;
+ }
+
+ id = match_device(drv->ids, dev);
+ if (!id) {
+ err = -ENODEV;
+ goto fail;
+ }
+
+ err = talk_to_otherend(dev);
+ if (err) {
+ dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
+ dev->nodename);
+ return err;
+ }
+
+ err = drv->probe(dev, id);
+ if (err)
+ goto fail;
+
+ err = watch_otherend(dev);
+ if (err) {
+ dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
+ dev->nodename);
+ return err;
+ }
+
+ return 0;
+fail:
+ xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
+ xenbus_switch_state(dev, XenbusStateClosed);
+ return -ENODEV;
+}
+
+int xenbus_dev_remove(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+
+ DPRINTK("%s", dev->nodename);
+
+ free_otherend_watch(dev);
+ free_otherend_details(dev);
+
+ if (drv->remove)
+ drv->remove(dev);
+
+ xenbus_switch_state(dev, XenbusStateClosed);
+ return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
+ dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ printk(KERN_INFO "%s: %s timeout closing device\n",
+ __func__, dev->nodename);
+ out:
+ put_device(&dev->dev);
+}
+
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name)
+{
+ drv->driver.name = drv->name;
+ drv->driver.bus = &bus->bus;
+ drv->driver.owner = owner;
+ drv->driver.mod_name = mod_name;
+
+ return driver_register(&drv->driver);
+}
+
+int __xenbus_register_frontend(struct xenbus_driver *drv,
+ struct module *owner, const char *mod_name)
+{
+ int ret;
+
+ drv->read_otherend_details = read_backend_details;
+
+ ret = xenbus_register_driver_common(drv, &xenbus_frontend,
+ owner, mod_name);
+ if (ret)
+ return ret;
+
+ /* If this driver is loaded as a module wait for devices to attach. */
+ wait_for_devices(drv);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+
+void xenbus_unregister_driver(struct xenbus_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
+
+struct xb_find_info
+{
+ struct xenbus_device *dev;
+ const char *nodename;
+};
+
+static int cmp_dev(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct xb_find_info *info = data;
+
+ if (!strcmp(xendev->nodename, info->nodename)) {
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
+ }
+ return 0;
+}
+
+struct xenbus_device *xenbus_device_find(const char *nodename,
+ struct bus_type *bus)
+{
+ struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+ bus_for_each_dev(bus, NULL, &info, cmp_dev);
+ return info.dev;
+}
+
+static int cleanup_dev(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct xb_find_info *info = data;
+ int len = strlen(info->nodename);
+
+ DPRINTK("%s", info->nodename);
+
+ /* Match the info->nodename path, or any subdirectory of that path. */
+ if (strncmp(xendev->nodename, info->nodename, len))
+ return 0;
+
+ /* If the node name is longer, ensure it really is a subdirectory. */
+ if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
+ return 0;
+
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+ struct xb_find_info info = { .nodename = path };
+
+ do {
+ info.dev = NULL;
+ bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+ if (info.dev) {
+ device_unregister(&info.dev->dev);
+ put_device(&info.dev->dev);
+ }
+ } while (info.dev);
+}
+
+static void xenbus_dev_release(struct device *dev)
+{
+ if (dev)
+ kfree(to_xenbus_device(dev));
+}
+
+static ssize_t xendev_show_nodename(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
+}
+DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
+
+static ssize_t xendev_show_devtype(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
+}
+DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+
+static ssize_t xendev_show_modalias(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
+}
+DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
+
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
+{
+ int err;
+ struct xenbus_device *xendev;
+ size_t stringlen;
+ char *tmpstring;
+
+ enum xenbus_state state = xenbus_read_driver_state(nodename);
+
+ if (state != XenbusStateInitialising) {
+ /* Device is not new, so ignore it. This can happen if a
+ device is going away after switching to Closed. */
+ return 0;
+ }
+
+ stringlen = strlen(nodename) + 1 + strlen(type) + 1;
+ xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
+ if (!xendev)
+ return -ENOMEM;
+
+ xendev->state = XenbusStateInitialising;
+
+ /* Copy the strings into the extra space. */
+
+ tmpstring = (char *)(xendev + 1);
+ strcpy(tmpstring, nodename);
+ xendev->nodename = tmpstring;
+
+ tmpstring += strlen(tmpstring) + 1;
+ strcpy(tmpstring, type);
+ xendev->devicetype = tmpstring;
+ init_completion(&xendev->down);
+
+ xendev->dev.bus = &bus->bus;
+ xendev->dev.release = xenbus_dev_release;
+
+ err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
+ if (err)
+ goto fail;
+
+ /* Register with generic device framework. */
+ err = device_register(&xendev->dev);
+ if (err)
+ goto fail;
+
+ err = device_create_file(&xendev->dev, &dev_attr_nodename);
+ if (err)
+ goto fail_unregister;
+
+ err = device_create_file(&xendev->dev, &dev_attr_devtype);
+ if (err)
+ goto fail_remove_nodename;
+
+ err = device_create_file(&xendev->dev, &dev_attr_modalias);
+ if (err)
+ goto fail_remove_devtype;
+
+ return 0;
+fail_remove_devtype:
+ device_remove_file(&xendev->dev, &dev_attr_devtype);
+fail_remove_nodename:
+ device_remove_file(&xendev->dev, &dev_attr_nodename);
+fail_unregister:
+ device_unregister(&xendev->dev);
+fail:
+ kfree(xendev);
+ return err;
+}
+
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(const char *type, const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
+ xenbus_frontend.root, type, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ DPRINTK("%s", nodename);
+
+ err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+{
+ int err = 0;
+ char **dir;
+ unsigned int dir_n = 0;
+ int i;
+
+ dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ for (i = 0; i < dir_n; i++) {
+ err = bus->probe(type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ return err;
+}
+
+int xenbus_probe_devices(struct xen_bus_type *bus)
+{
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n;
+
+ dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_device_type(bus, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ return err;
+}
+
+static unsigned int char_count(const char *str, char c)
+{
+ unsigned int i, ret = 0;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c)
+ ret++;
+ return ret;
+}
+
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c) {
+ if (len == 0)
+ return i;
+ len--;
+ }
+ return (len == 0) ? i : -ERANGE;
+}
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+{
+ int exists, rootlen;
+ struct xenbus_device *dev;
+ char type[BUS_ID_SIZE];
+ const char *p, *root;
+
+ if (char_count(node, '/') < 2)
+ return;
+
+ exists = xenbus_exists(XBT_NIL, node, "");
+ if (!exists) {
+ xenbus_cleanup_devices(node, &bus->bus);
+ return;
+ }
+
+ /* backend/<type>/... or device/<type>/... */
+ p = strchr(node, '/') + 1;
+ snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
+ type[BUS_ID_SIZE-1] = '\0';
+
+ rootlen = strsep_len(node, '/', bus->levels);
+ if (rootlen < 0)
+ return;
+ root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
+ if (!root)
+ return;
+
+ dev = xenbus_device_find(root, &bus->bus);
+ if (!dev)
+ xenbus_probe_node(bus, type, root);
+ else
+ put_device(&dev->dev);
+
+ kfree(root);
+}
+
+static void frontend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ DPRINTK("");
+
+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
+}
+
+/* We watch for devices appearing and vanishing. */
+static struct xenbus_watch fe_watch = {
+ .node = "device",
+ .callback = frontend_changed,
+};
+
+static int suspend_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->suspend)
+ err = drv->suspend(xdev);
+ if (err)
+ printk(KERN_WARNING
+ "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+ return 0;
+}
+
+static int suspend_cancel_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->suspend_cancel)
+ err = drv->suspend_cancel(xdev);
+ if (err)
+ printk(KERN_WARNING
+ "xenbus: suspend_cancel %s failed: %i\n",
+ dev->bus_id, err);
+ return 0;
+}
+
+static int resume_dev(struct device *dev, void *data)
+{
+ int err;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+
+ err = talk_to_otherend(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus: resume (talk_to_otherend) %s failed: %i\n",
+ dev->bus_id, err);
+ return err;
+ }
+
+ xdev->state = XenbusStateInitialising;
+
+ if (drv->resume) {
+ err = drv->resume(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus: resume %s failed: %i\n",
+ dev->bus_id, err);
+ return err;
+ }
+ }
+
+ err = watch_otherend(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus_probe: resume (watch_otherend) %s failed: "
+ "%d.\n", dev->bus_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+void xenbus_suspend(void)
+{
+ DPRINTK("");
+
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+ xenbus_backend_suspend(suspend_dev);
+ xs_suspend();
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend);
+
+void xenbus_resume(void)
+{
+ xb_init_comms();
+ xs_resume();
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+ xenbus_backend_resume(resume_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_resume);
+
+void xenbus_suspend_cancel(void)
+{
+ xs_suspend_cancel();
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
+ xenbus_backend_resume(suspend_cancel_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+
+/* A flag to determine if xenstored is 'ready' (i.e. has started) */
+int xenstored_ready = 0;
+
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+ int ret = 0;
+
+ if (xenstored_ready > 0)
+ ret = nb->notifier_call(nb, 0, NULL);
+ else
+ blocking_notifier_chain_register(&xenstore_chain, nb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&xenstore_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
+
+void xenbus_probe(struct work_struct *unused)
+{
+ BUG_ON((xenstored_ready <= 0));
+
+ /* Enumerate devices in xenstore and watch for changes. */
+ xenbus_probe_devices(&xenbus_frontend);
+ register_xenbus_watch(&fe_watch);
+ xenbus_backend_probe_and_watch();
+
+ /* Notify others that xenstore is up */
+ blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
+}
+
+static int __init xenbus_probe_init(void)
+{
+ int err = 0;
+
+ DPRINTK("");
+
+ err = -ENODEV;
+ if (!xen_domain())
+ goto out_error;
+
+ /* Register ourselves with the kernel bus subsystem */
+ err = bus_register(&xenbus_frontend.bus);
+ if (err)
+ goto out_error;
+
+ err = xenbus_backend_bus_register();
+ if (err)
+ goto out_unreg_front;
+
+ /*
+ * Domain0 doesn't have a store_evtchn or store_mfn yet.
+ */
+ if (xen_initial_domain()) {
+ /* dom0 not yet supported */
+ } else {
+ xenstored_ready = 1;
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ }
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+
+ /* Initialize the interface to xenstore. */
+ err = xs_init();
+ if (err) {
+ printk(KERN_WARNING
+ "XENBUS: Error initializing xenstore comms: %i\n", err);
+ goto out_unreg_back;
+ }
+
+ if (!xen_initial_domain())
+ xenbus_probe(NULL);
+
+ return 0;
+
+ out_unreg_back:
+ xenbus_backend_bus_unregister();
+
+ out_unreg_front:
+ bus_unregister(&xenbus_frontend.bus);
+
+ out_error:
+ return err;
+}
+
+postcore_initcall(xenbus_probe_init);
+
+MODULE_LICENSE("GPL");
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+ struct xenbus_driver *xendrv;
+
+ /*
+ * A device with no driver will never connect. We care only about
+ * devices which should currently be in the process of connecting.
+ */
+ if (!dev->driver)
+ return 0;
+
+ /* Is this search limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ xendrv = to_xenbus_driver(dev->driver);
+ return (xendev->state != XenbusStateConnected ||
+ (xendrv->is_ready && !xendrv->is_ready(xendev)));
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+ return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+
+ /* Is this operation limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ if (!dev->driver) {
+ /* Information only: is this too noisy? */
+ printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+ xendev->nodename);
+ } else if (xendev->state != XenbusStateConnected) {
+ printk(KERN_WARNING "XENBUS: Timeout connecting "
+ "to device: %s (state %d)\n",
+ xendev->nodename, xendev->state);
+ }
+
+ return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
+
+/*
+ * On a 10 second timeout, wait for all devices currently configured. We need
+ * to do this to guarantee that the filesystems and / or network devices
+ * needed for boot are available, before we can allow the boot to proceed.
+ *
+ * This needs to be on a late_initcall, to happen after the frontend device
+ * drivers have been initialised, but before the root fs is mounted.
+ *
+ * A possible improvement here would be to have the tools add a per-device
+ * flag to the store entry, indicating whether it is needed at boot time.
+ * This would allow people who knew what they were doing to accelerate their
+ * boot slightly, but of course needs tools or manual intervention to set up
+ * those flags correctly.
+ */
+static void wait_for_devices(struct xenbus_driver *xendrv)
+{
+ unsigned long timeout = jiffies + 10*HZ;
+ struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+ if (!ready_to_wait_for_devices || !xen_domain())
+ return;
+
+ while (exists_disconnected_device(drv)) {
+ if (time_after(jiffies, timeout))
+ break;
+ schedule_timeout_interruptible(HZ/10);
+ }
+
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+ ready_to_wait_for_devices = 1;
+ wait_for_devices(NULL);
+ return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 0000000..e09b194
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern int xenbus_backend_bus_register(void);
+extern void xenbus_backend_bus_unregister(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline int xenbus_backend_bus_register(void) { return 0; }
+static inline void xenbus_backend_bus_unregister(void) {}
+#endif
+
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
new file mode 100644
index 0000000..7f2f91c
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -0,0 +1,861 @@
+/******************************************************************************
+ * xenbus_xs.c
+ *
+ * This is the kernel equivalent of the "xs" library. We don't need everything
+ * and we use xenbus_comms for communication.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/unistd.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/fcntl.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <xen/xenbus.h>
+#include "xenbus_comms.h"
+
+struct xs_stored_msg {
+ struct list_head list;
+
+ struct xsd_sockmsg hdr;
+
+ union {
+ /* Queued replies. */
+ struct {
+ char *body;
+ } reply;
+
+ /* Queued watch events. */
+ struct {
+ struct xenbus_watch *handle;
+ char **vec;
+ unsigned int vec_size;
+ } watch;
+ } u;
+};
+
+struct xs_handle {
+ /* A list of replies. Currently only one will ever be outstanding. */
+ struct list_head reply_list;
+ spinlock_t reply_lock;
+ wait_queue_head_t reply_waitq;
+
+ /*
+ * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
+ * response_mutex is never taken simultaneously with the other three.
+ */
+
+ /* One request at a time. */
+ struct mutex request_mutex;
+
+ /* Protect xenbus reader thread against save/restore. */
+ struct mutex response_mutex;
+
+ /* Protect transactions against save/restore. */
+ struct rw_semaphore transaction_mutex;
+
+ /* Protect watch (de)register against save/restore. */
+ struct rw_semaphore watch_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
+static LIST_HEAD(watches);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+static pid_t xenwatch_pid;
+static DEFINE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
+
+static int get_error(const char *errorstring)
+{
+ unsigned int i;
+
+ for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
+ if (i == ARRAY_SIZE(xsd_errors) - 1) {
+ printk(KERN_WARNING
+ "XENBUS xen store gave: unknown error %s",
+ errorstring);
+ return EINVAL;
+ }
+ }
+ return xsd_errors[i].errnum;
+}
+
+static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
+{
+ struct xs_stored_msg *msg;
+ char *body;
+
+ spin_lock(&xs_state.reply_lock);
+
+ while (list_empty(&xs_state.reply_list)) {
+ spin_unlock(&xs_state.reply_lock);
+ /* XXX FIXME: Avoid synchronous wait for response here. */
+ wait_event(xs_state.reply_waitq,
+ !list_empty(&xs_state.reply_list));
+ spin_lock(&xs_state.reply_lock);
+ }
+
+ msg = list_entry(xs_state.reply_list.next,
+ struct xs_stored_msg, list);
+ list_del(&msg->list);
+
+ spin_unlock(&xs_state.reply_lock);
+
+ *type = msg->hdr.type;
+ if (len)
+ *len = msg->hdr.len;
+ body = msg->u.reply.body;
+
+ kfree(msg);
+
+ return body;
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+ void *ret;
+ struct xsd_sockmsg req_msg = *msg;
+ int err;
+
+ if (req_msg.type == XS_TRANSACTION_START)
+ down_read(&xs_state.transaction_mutex);
+
+ mutex_lock(&xs_state.request_mutex);
+
+ err = xb_write(msg, sizeof(*msg) + msg->len);
+ if (err) {
+ msg->type = XS_ERROR;
+ ret = ERR_PTR(err);
+ } else
+ ret = read_reply(&msg->type, &msg->len);
+
+ mutex_unlock(&xs_state.request_mutex);
+
+ if ((msg->type == XS_TRANSACTION_END) ||
+ ((req_msg.type == XS_TRANSACTION_START) &&
+ (msg->type == XS_ERROR)))
+ up_read(&xs_state.transaction_mutex);
+
+ return ret;
+}
+
+/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
+static void *xs_talkv(struct xenbus_transaction t,
+ enum xsd_sockmsg_type type,
+ const struct kvec *iovec,
+ unsigned int num_vecs,
+ unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret = NULL;
+ unsigned int i;
+ int err;
+
+ msg.tx_id = t.id;
+ msg.req_id = 0;
+ msg.type = type;
+ msg.len = 0;
+ for (i = 0; i < num_vecs; i++)
+ msg.len += iovec[i].iov_len;
+
+ mutex_lock(&xs_state.request_mutex);
+
+ err = xb_write(&msg, sizeof(msg));
+ if (err) {
+ mutex_unlock(&xs_state.request_mutex);
+ return ERR_PTR(err);
+ }
+
+ for (i = 0; i < num_vecs; i++) {
+ err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
+ if (err) {
+ mutex_unlock(&xs_state.request_mutex);
+ return ERR_PTR(err);
+ }
+ }
+
+ ret = read_reply(&msg.type, len);
+
+ mutex_unlock(&xs_state.request_mutex);
+
+ if (IS_ERR(ret))
+ return ret;
+
+ if (msg.type == XS_ERROR) {
+ err = get_error(ret);
+ kfree(ret);
+ return ERR_PTR(-err);
+ }
+
+ if (msg.type != type) {
+ if (printk_ratelimit())
+ printk(KERN_WARNING
+ "XENBUS unexpected type [%d], expected [%d]\n",
+ msg.type, type);
+ kfree(ret);
+ return ERR_PTR(-EINVAL);
+ }
+ return ret;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xenbus_transaction t,
+ enum xsd_sockmsg_type type,
+ const char *string,
+ unsigned int *len)
+{
+ struct kvec iovec;
+
+ iovec.iov_base = (void *)string;
+ iovec.iov_len = strlen(string) + 1;
+ return xs_talkv(t, type, &iovec, 1, len);
+}
+
+/* Many commands only need an ack, don't care what it says. */
+static int xs_error(char *reply)
+{
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+ kfree(reply);
+ return 0;
+}
+
+static unsigned int count_strings(const char *strings, unsigned int len)
+{
+ unsigned int num;
+ const char *p;
+
+ for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+ num++;
+
+ return num;
+}
+
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
+static char *join(const char *dir, const char *name)
+{
+ char *buffer;
+
+ if (strlen(name) == 0)
+ buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
+ else
+ buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
+ return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
+}
+
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+ char *p, **ret;
+
+ /* Count the strings. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
+ if (!ret) {
+ kfree(strings);
+ return ERR_PTR(-ENOMEM);
+ }
+ memcpy(&ret[*num], strings, len);
+ kfree(strings);
+
+ strings = (char *)&ret[*num];
+ for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+ ret[(*num)++] = p;
+
+ return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction t,
+ const char *dir, const char *node, unsigned int *num)
+{
+ char *strings, *path;
+ unsigned int len;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return (char **)path;
+
+ strings = xs_single(t, XS_DIRECTORY, path, &len);
+ kfree(path);
+ if (IS_ERR(strings))
+ return (char **)strings;
+
+ return split(strings, len, num);
+}
+EXPORT_SYMBOL_GPL(xenbus_directory);
+
+/* Check if a path exists. Return 1 if it does. */
+int xenbus_exists(struct xenbus_transaction t,
+ const char *dir, const char *node)
+{
+ char **d;
+ int dir_n;
+
+ d = xenbus_directory(t, dir, node, &dir_n);
+ if (IS_ERR(d))
+ return 0;
+ kfree(d);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(xenbus_exists);
+
+/* Get the value of a single file.
+ * Returns a kmalloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xenbus_read(struct xenbus_transaction t,
+ const char *dir, const char *node, unsigned int *len)
+{
+ char *path;
+ void *ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return (void *)path;
+
+ ret = xs_single(t, XS_READ, path, len);
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_read);
+
+/* Write the value of a single file.
+ * Returns -err on failure.
+ */
+int xenbus_write(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *string)
+{
+ const char *path;
+ struct kvec iovec[2];
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ iovec[0].iov_base = (void *)path;
+ iovec[0].iov_len = strlen(path) + 1;
+ iovec[1].iov_base = (void *)string;
+ iovec[1].iov_len = strlen(string);
+
+ ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_write);
+
+/* Create a new directory. */
+int xenbus_mkdir(struct xenbus_transaction t,
+ const char *dir, const char *node)
+{
+ char *path;
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_mkdir);
+
+/* Destroy a file or directory (directories must be empty). */
+int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
+{
+ char *path;
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = xs_error(xs_single(t, XS_RM, path, NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_rm);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ */
+int xenbus_transaction_start(struct xenbus_transaction *t)
+{
+ char *id_str;
+
+ down_read(&xs_state.transaction_mutex);
+
+ id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
+ if (IS_ERR(id_str)) {
+ up_read(&xs_state.transaction_mutex);
+ return PTR_ERR(id_str);
+ }
+
+ t->id = simple_strtoul(id_str, NULL, 0);
+ kfree(id_str);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_start);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ */
+int xenbus_transaction_end(struct xenbus_transaction t, int abort)
+{
+ char abortstr[2];
+ int err;
+
+ if (abort)
+ strcpy(abortstr, "F");
+ else
+ strcpy(abortstr, "T");
+
+ err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+
+ up_read(&xs_state.transaction_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_end);
+
+/* Single read and scanf: returns -errno or num scanned. */
+int xenbus_scanf(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ char *val;
+
+ val = xenbus_read(t, dir, node, NULL);
+ if (IS_ERR(val))
+ return PTR_ERR(val);
+
+ va_start(ap, fmt);
+ ret = vsscanf(val, fmt, ap);
+ va_end(ap);
+ kfree(val);
+ /* Distinctive errno. */
+ if (ret == 0)
+ return -ERANGE;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_scanf);
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+#define PRINTF_BUFFER_SIZE 4096
+ char *printf_buffer;
+
+ printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+ if (printf_buffer == NULL)
+ return -ENOMEM;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+ va_end(ap);
+
+ BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+ ret = xenbus_write(t, dir, node, printf_buffer);
+
+ kfree(printf_buffer);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_printf);
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
+{
+ va_list ap;
+ const char *name;
+ int ret = 0;
+
+ va_start(ap, dir);
+ while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+ const char *fmt = va_arg(ap, char *);
+ void *result = va_arg(ap, void *);
+ char *p;
+
+ p = xenbus_read(t, dir, name, NULL);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ break;
+ }
+ if (fmt) {
+ if (sscanf(p, fmt, result) == 0)
+ ret = -EINVAL;
+ kfree(p);
+ } else
+ *(char **)result = p;
+ }
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_gather);
+
+static int xs_watch(const char *path, const char *token)
+{
+ struct kvec iov[2];
+
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = (void *)token;
+ iov[1].iov_len = strlen(token) + 1;
+
+ return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
+ ARRAY_SIZE(iov), NULL));
+}
+
+static int xs_unwatch(const char *path, const char *token)
+{
+ struct kvec iov[2];
+
+ iov[0].iov_base = (char *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = (char *)token;
+ iov[1].iov_len = strlen(token) + 1;
+
+ return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
+ ARRAY_SIZE(iov), NULL));
+}
+
+static struct xenbus_watch *find_watch(const char *token)
+{
+ struct xenbus_watch *i, *cmp;
+
+ cmp = (void *)simple_strtoul(token, NULL, 16);
+
+ list_for_each_entry(i, &watches, list)
+ if (i == cmp)
+ return i;
+
+ return NULL;
+}
+
+/* Register callback to watch this node. */
+int register_xenbus_watch(struct xenbus_watch *watch)
+{
+ /* Pointer in ascii is the token. */
+ char token[sizeof(watch) * 2 + 1];
+ int err;
+
+ sprintf(token, "%lX", (long)watch);
+
+ down_read(&xs_state.watch_mutex);
+
+ spin_lock(&watches_lock);
+ BUG_ON(find_watch(token));
+ list_add(&watch->list, &watches);
+ spin_unlock(&watches_lock);
+
+ err = xs_watch(watch->node, token);
+
+ /* Ignore errors due to multiple registration. */
+ if ((err != 0) && (err != -EEXIST)) {
+ spin_lock(&watches_lock);
+ list_del(&watch->list);
+ spin_unlock(&watches_lock);
+ }
+
+ up_read(&xs_state.watch_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_xenbus_watch);
+
+void unregister_xenbus_watch(struct xenbus_watch *watch)
+{
+ struct xs_stored_msg *msg, *tmp;
+ char token[sizeof(watch) * 2 + 1];
+ int err;
+
+ sprintf(token, "%lX", (long)watch);
+
+ down_read(&xs_state.watch_mutex);
+
+ spin_lock(&watches_lock);
+ BUG_ON(!find_watch(token));
+ list_del(&watch->list);
+ spin_unlock(&watches_lock);
+
+ err = xs_unwatch(watch->node, token);
+ if (err)
+ printk(KERN_WARNING
+ "XENBUS Failed to release watch %s: %i\n",
+ watch->node, err);
+
+ up_read(&xs_state.watch_mutex);
+
+ /* Make sure there are no callbacks running currently (unless
+ its us) */
+ if (current->pid != xenwatch_pid)
+ mutex_lock(&xenwatch_mutex);
+
+ /* Cancel pending watch events. */
+ spin_lock(&watch_events_lock);
+ list_for_each_entry_safe(msg, tmp, &watch_events, list) {
+ if (msg->u.watch.handle != watch)
+ continue;
+ list_del(&msg->list);
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+ spin_unlock(&watch_events_lock);
+
+ if (current->pid != xenwatch_pid)
+ mutex_unlock(&xenwatch_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
+
+void xs_suspend(void)
+{
+ down_write(&xs_state.transaction_mutex);
+ down_write(&xs_state.watch_mutex);
+ mutex_lock(&xs_state.request_mutex);
+ mutex_lock(&xs_state.response_mutex);
+}
+
+void xs_resume(void)
+{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
+ mutex_unlock(&xs_state.response_mutex);
+ mutex_unlock(&xs_state.request_mutex);
+ up_write(&xs_state.transaction_mutex);
+
+ /* No need for watches_lock: the watch_mutex is sufficient. */
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_watch(watch->node, token);
+ }
+
+ up_write(&xs_state.watch_mutex);
+}
+
+void xs_suspend_cancel(void)
+{
+ mutex_unlock(&xs_state.response_mutex);
+ mutex_unlock(&xs_state.request_mutex);
+ up_write(&xs_state.watch_mutex);
+ up_write(&xs_state.transaction_mutex);
+}
+
+static int xenwatch_thread(void *unused)
+{
+ struct list_head *ent;
+ struct xs_stored_msg *msg;
+
+ for (;;) {
+ wait_event_interruptible(watch_events_waitq,
+ !list_empty(&watch_events));
+
+ if (kthread_should_stop())
+ break;
+
+ mutex_lock(&xenwatch_mutex);
+
+ spin_lock(&watch_events_lock);
+ ent = watch_events.next;
+ if (ent != &watch_events)
+ list_del(ent);
+ spin_unlock(&watch_events_lock);
+
+ if (ent != &watch_events) {
+ msg = list_entry(ent, struct xs_stored_msg, list);
+ msg->u.watch.handle->callback(
+ msg->u.watch.handle,
+ (const char **)msg->u.watch.vec,
+ msg->u.watch.vec_size);
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+
+ mutex_unlock(&xenwatch_mutex);
+ }
+
+ return 0;
+}
+
+static int process_msg(void)
+{
+ struct xs_stored_msg *msg;
+ char *body;
+ int err;
+
+ /*
+ * We must disallow save/restore while reading a xenstore message.
+ * A partial read across s/r leaves us out of sync with xenstored.
+ */
+ for (;;) {
+ err = xb_wait_for_data_to_read();
+ if (err)
+ return err;
+ mutex_lock(&xs_state.response_mutex);
+ if (xb_data_to_read())
+ break;
+ /* We raced with save/restore: pending data 'disappeared'. */
+ mutex_unlock(&xs_state.response_mutex);
+ }
+
+
+ msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH);
+ if (msg == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xb_read(&msg->hdr, sizeof(msg->hdr));
+ if (err) {
+ kfree(msg);
+ goto out;
+ }
+
+ body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
+ if (body == NULL) {
+ kfree(msg);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xb_read(body, msg->hdr.len);
+ if (err) {
+ kfree(body);
+ kfree(msg);
+ goto out;
+ }
+ body[msg->hdr.len] = '\0';
+
+ if (msg->hdr.type == XS_WATCH_EVENT) {
+ msg->u.watch.vec = split(body, msg->hdr.len,
+ &msg->u.watch.vec_size);
+ if (IS_ERR(msg->u.watch.vec)) {
+ err = PTR_ERR(msg->u.watch.vec);
+ kfree(msg);
+ goto out;
+ }
+
+ spin_lock(&watches_lock);
+ msg->u.watch.handle = find_watch(
+ msg->u.watch.vec[XS_WATCH_TOKEN]);
+ if (msg->u.watch.handle != NULL) {
+ spin_lock(&watch_events_lock);
+ list_add_tail(&msg->list, &watch_events);
+ wake_up(&watch_events_waitq);
+ spin_unlock(&watch_events_lock);
+ } else {
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+ spin_unlock(&watches_lock);
+ } else {
+ msg->u.reply.body = body;
+ spin_lock(&xs_state.reply_lock);
+ list_add_tail(&msg->list, &xs_state.reply_list);
+ spin_unlock(&xs_state.reply_lock);
+ wake_up(&xs_state.reply_waitq);
+ }
+
+ out:
+ mutex_unlock(&xs_state.response_mutex);
+ return err;
+}
+
+static int xenbus_thread(void *unused)
+{
+ int err;
+
+ for (;;) {
+ err = process_msg();
+ if (err)
+ printk(KERN_WARNING "XENBUS error %d while reading "
+ "message\n", err);
+ if (kthread_should_stop())
+ break;
+ }
+
+ return 0;
+}
+
+int xs_init(void)
+{
+ int err;
+ struct task_struct *task;
+
+ INIT_LIST_HEAD(&xs_state.reply_list);
+ spin_lock_init(&xs_state.reply_lock);
+ init_waitqueue_head(&xs_state.reply_waitq);
+
+ mutex_init(&xs_state.request_mutex);
+ mutex_init(&xs_state.response_mutex);
+ init_rwsem(&xs_state.transaction_mutex);
+ init_rwsem(&xs_state.watch_mutex);
+
+ /* Initialize the shared memory rings to talk to xenstored */
+ err = xb_init_comms();
+ if (err)
+ return err;
+
+ task = kthread_run(xenwatch_thread, NULL, "xenwatch");
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ xenwatch_pid = task->pid;
+
+ task = kthread_run(xenbus_thread, NULL, "xenbus");
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ return 0;
+}
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
new file mode 100644
index 0000000..a240b2c
--- /dev/null
+++ b/drivers/xen/xencomm.c
@@ -0,0 +1,217 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <xen/xencomm.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/xencomm.h> /* for xencomm_is_phys_contiguous() */
+
+static int xencomm_init(struct xencomm_desc *desc,
+ void *buffer, unsigned long bytes)
+{
+ unsigned long recorded = 0;
+ int i = 0;
+
+ while ((recorded < bytes) && (i < desc->nr_addrs)) {
+ unsigned long vaddr = (unsigned long)buffer + recorded;
+ unsigned long paddr;
+ int offset;
+ int chunksz;
+
+ offset = vaddr % PAGE_SIZE; /* handle partial pages */
+ chunksz = min(PAGE_SIZE - offset, bytes - recorded);
+
+ paddr = xencomm_vtop(vaddr);
+ if (paddr == ~0UL) {
+ printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
+ __func__, vaddr);
+ return -EINVAL;
+ }
+
+ desc->address[i++] = paddr;
+ recorded += chunksz;
+ }
+
+ if (recorded < bytes) {
+ printk(KERN_DEBUG
+ "%s: could only translate %ld of %ld bytes\n",
+ __func__, recorded, bytes);
+ return -ENOSPC;
+ }
+
+ /* mark remaining addresses invalid (just for safety) */
+ while (i < desc->nr_addrs)
+ desc->address[i++] = XENCOMM_INVALID;
+
+ desc->magic = XENCOMM_MAGIC;
+
+ return 0;
+}
+
+static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
+ void *buffer, unsigned long bytes)
+{
+ struct xencomm_desc *desc;
+ unsigned long buffer_ulong = (unsigned long)buffer;
+ unsigned long start = buffer_ulong & PAGE_MASK;
+ unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
+ unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
+ unsigned long size = sizeof(*desc) +
+ sizeof(desc->address[0]) * nr_addrs;
+
+ /*
+ * slab allocator returns at least sizeof(void*) aligned pointer.
+ * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
+ * cross page boundary.
+ */
+ if (sizeof(*desc) > sizeof(void *)) {
+ unsigned long order = get_order(size);
+ desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
+ order);
+ if (desc == NULL)
+ return NULL;
+
+ desc->nr_addrs =
+ ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
+ sizeof(*desc->address);
+ } else {
+ desc = kmalloc(size, gfp_mask);
+ if (desc == NULL)
+ return NULL;
+
+ desc->nr_addrs = nr_addrs;
+ }
+ return desc;
+}
+
+void xencomm_free(struct xencomm_handle *desc)
+{
+ if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
+ struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
+ if (sizeof(*desc__) > sizeof(void *)) {
+ unsigned long size = sizeof(*desc__) +
+ sizeof(desc__->address[0]) * desc__->nr_addrs;
+ unsigned long order = get_order(size);
+ free_pages((unsigned long)__va(desc), order);
+ } else
+ kfree(__va(desc));
+ }
+}
+
+static int xencomm_create(void *buffer, unsigned long bytes,
+ struct xencomm_desc **ret, gfp_t gfp_mask)
+{
+ struct xencomm_desc *desc;
+ int rc;
+
+ pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
+
+ if (bytes == 0) {
+ /* don't create a descriptor; Xen recognizes NULL. */
+ BUG_ON(buffer != NULL);
+ *ret = NULL;
+ return 0;
+ }
+
+ BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
+
+ desc = xencomm_alloc(gfp_mask, buffer, bytes);
+ if (!desc) {
+ printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
+ return -ENOMEM;
+ }
+
+ rc = xencomm_init(desc, buffer, bytes);
+ if (rc) {
+ printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
+ xencomm_free((struct xencomm_handle *)__pa(desc));
+ return rc;
+ }
+
+ *ret = desc;
+ return 0;
+}
+
+static struct xencomm_handle *xencomm_create_inline(void *ptr)
+{
+ unsigned long paddr;
+
+ BUG_ON(!xencomm_is_phys_contiguous((unsigned long)ptr));
+
+ paddr = (unsigned long)xencomm_pa(ptr);
+ BUG_ON(paddr & XENCOMM_INLINE_FLAG);
+ return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
+/* "mini" routine, for stack-based communications: */
+static int xencomm_create_mini(void *buffer,
+ unsigned long bytes, struct xencomm_mini *xc_desc,
+ struct xencomm_desc **ret)
+{
+ int rc = 0;
+ struct xencomm_desc *desc;
+ BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
+
+ desc = (void *)xc_desc;
+
+ desc->nr_addrs = XENCOMM_MINI_ADDRS;
+
+ rc = xencomm_init(desc, buffer, bytes);
+ if (!rc)
+ *ret = desc;
+
+ return rc;
+}
+
+struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
+{
+ int rc;
+ struct xencomm_desc *desc;
+
+ if (xencomm_is_phys_contiguous((unsigned long)ptr))
+ return xencomm_create_inline(ptr);
+
+ rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
+
+ if (rc || desc == NULL)
+ return NULL;
+
+ return xencomm_pa(desc);
+}
+
+struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
+ struct xencomm_mini *xc_desc)
+{
+ int rc;
+ struct xencomm_desc *desc = NULL;
+
+ if (xencomm_is_phys_contiguous((unsigned long)ptr))
+ return xencomm_create_inline(ptr);
+
+ rc = xencomm_create_mini(ptr, bytes, xc_desc,
+ &desc);
+
+ if (rc)
+ return NULL;
+
+ return xencomm_pa(desc);
+}
OpenPOWER on IntegriCloud