diff options
Diffstat (limited to 'sys/xen')
68 files changed, 14624 insertions, 0 deletions
diff --git a/sys/xen/evtchn/evtchn.c b/sys/xen/evtchn/evtchn.c new file mode 100644 index 0000000..e7a40a2 --- /dev/null +++ b/sys/xen/evtchn/evtchn.c @@ -0,0 +1,1097 @@ +/****************************************************************************** + * evtchn.c + * + * Communication via Xen event channels. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005-2006 Kip Macy + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/interrupt.h> + +#include <machine/cpufunc.h> +#include <machine/intr_machdep.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/xen_intr.h> +#include <machine/xen/synch_bitops.h> +#include <machine/xen/evtchn.h> +#include <machine/xen/hypervisor.h> + + +/* linux helper functions that got sucked in + * rename and move XXX + */ + + +static inline int find_first_bit(const unsigned long *addr, unsigned size) +{ + int d0, d1; + int res; + + /* This looks at memory. Mark it volatile to tell gcc not to move it around */ + __asm__ __volatile__( + "xorl %%eax,%%eax\n\t" + "repe; scasl\n\t" + "jz 1f\n\t" + "leal -4(%%edi),%%edi\n\t" + "bsfl (%%edi),%%eax\n" + "1:\tsubl %%ebx,%%edi\n\t" + "shll $3,%%edi\n\t" + "addl %%edi,%%eax" + :"=a" (res), "=&c" (d0), "=&D" (d1) + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory"); + return res; +} + +#define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#define first_cpu(src) __first_cpu(&(src), NR_CPUS) +static inline int __first_cpu(const xen_cpumask_t *srcp, int nbits) +{ + return min_t(int, nbits, find_first_bit(srcp->bits, nbits)); +} + +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +static struct mtx irq_mapping_update_lock; +static struct xenpic *xp; +struct xenpic_intsrc { + struct intsrc xp_intsrc; + uint8_t xp_vector; + boolean_t xp_masked; +}; + +struct xenpic { + struct pic *xp_dynirq_pic; + struct pic *xp_pirq_pic; + uint16_t xp_numintr; + struct xenpic_intsrc xp_pins[0]; +}; + +#define TODO printf("%s: not implemented!\n", __func__) + +/* IRQ <-> event-channel mappings. */ +static int evtchn_to_irq[NR_EVENT_CHANNELS]; + +/* Packed IRQ information: binding type, sub-type index, and event channel. */ +static uint32_t irq_info[NR_IRQS]; +/* Binding types. */ +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_LOCAL_PORT, + IRQT_CALLER_PORT +}; + +/* Constructor for packed IRQ information. */ +#define mk_irq_info(type, index, evtchn) \ + (((uint32_t)(type) << 24) | ((uint32_t)(index) << 16) | (uint32_t)(evtchn)) +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) +/* Accessor macros for packed IRQ information. */ +#define evtchn_from_irq(irq) ((uint16_t)(irq_info[irq])) +#define index_from_irq(irq) ((uint8_t)(irq_info[irq] >> 16)) +#define type_from_irq(irq) ((uint8_t)(irq_info[irq] >> 24)) + +/* IRQ <-> VIRQ mapping. */ +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]); + +/* IRQ <-> IPI mapping. */ +#ifndef NR_IPIS +#define NR_IPIS 1 +#endif +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]); + +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ +static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +#define VALID_EVTCHN(_chn) ((_chn) != 0) + +#ifdef CONFIG_SMP + +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + cpu_evtchn_mask[cpu][idx] & \ + ~(sh)->evtchn_mask[idx]) + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ + /* By default all event channels notify CPU#0. */ + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +} + +#define cpu_from_evtchn(evtchn) (cpu_evtchn[evtchn]) + +#else + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) +#define bind_evtchn_to_cpu(chn,cpu) ((void)0) +#define init_evtchn_cpu_bindings() ((void)0) +#define cpu_from_evtchn(evtchn) (0) + +#endif + + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + +void +evtchn_do_upcall(struct trapframe *frame) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int irq, cpu; + shared_info_t *s; + vcpu_info_t *vcpu_info; + + cpu = smp_processor_id(); + s = HYPERVISOR_shared_info; + vcpu_info = &s->vcpu_info[cpu]; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0); + + while (l1 != 0) { + l1i = __ffs(l1); + l1 &= ~(1 << l1i); + + while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { + l2i = __ffs(l2); + + port = (l1i * BITS_PER_LONG) + l2i; + if ((irq = evtchn_to_irq[port]) != -1) { + struct intsrc *isrc = intr_lookup_source(irq); + /* + * ack + */ + mask_evtchn(port); + clear_evtchn(port); + + intr_execute_handlers(isrc, frame); + } else { + evtchn_device_upcall(port); + } + } + } +} + +static int +find_unbound_irq(void) +{ + int dynirq, irq; + + for (dynirq = 0; dynirq < NR_IRQS; dynirq++) { + irq = dynirq_to_irq(dynirq); + if (irq_bindcount[irq] == 0) + break; + } + + if (irq == NR_IRQS) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return (irq); +} + +static int +bind_caller_port_to_irq(unsigned int caller_port) +{ + int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = evtchn_to_irq[caller_port]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + + evtchn_to_irq[caller_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); + } + + irq_bindcount[irq]++; + + out: + mtx_unlock_spin(&irq_mapping_update_lock); + return irq; +} + +static int +bind_local_port_to_irq(unsigned int local_port) +{ + int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + PANIC_IF(evtchn_to_irq[local_port] != -1); + + if ((irq = find_unbound_irq()) < 0) { + struct evtchn_close close = { .port = local_port }; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)); + + goto out; + } + + evtchn_to_irq[local_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); + irq_bindcount[irq]++; + + out: + mtx_unlock_spin(&irq_mapping_update_lock); + return irq; +} + +static int +bind_listening_port_to_irq(unsigned int remote_domain) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + return err ? : bind_local_port_to_irq(alloc_unbound.port); +} + +static int +bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + unsigned int remote_port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + + return err ? : bind_local_port_to_irq(bind_interdomain.local_port); +} + +static int +bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0); + + evtchn = bind_virq.port; + + irq = find_unbound_irq(); + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + mtx_unlock_spin(&irq_mapping_update_lock); + + return irq; +} + +static int +bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + + bind_ipi.vcpu = cpu; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0); + evtchn = bind_ipi.port; + + irq = find_unbound_irq(); + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; +out: + + mtx_unlock_spin(&irq_mapping_update_lock); + + return irq; +} + + +void +unbind_from_irq(int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { + close.port = evtchn; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))[index_from_irq(irq)] = -1; + break; + case IRQT_IPI: + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))[index_from_irq(irq)] = -1; + break; + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + } + + mtx_unlock_spin(&irq_mapping_update_lock); +} + +int +bind_caller_port_to_irqhandler(unsigned int caller_port, + const char *devname, + driver_intr_t handler, + void *arg, + unsigned long irqflags, + void **cookiep) +{ + unsigned int irq; + int retval; + + irq = bind_caller_port_to_irq(caller_port); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + retval = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, cookiep); + if (retval != 0) { + unbind_from_irq(irq); + return -retval; + } + + return irq; +} + +int +bind_listening_port_to_irqhandler( + unsigned int remote_domain, + const char *devname, + driver_intr_t handler, + void *arg, + unsigned long irqflags, + void **cookiep) +{ + unsigned int irq; + int retval; + + irq = bind_listening_port_to_irq(remote_domain); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + retval = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, cookiep); + if (retval != 0) { + unbind_from_irq(irq); + return -retval; + } + + return irq; +} + +int +bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + const char *devname, + driver_filter_t filter, + driver_intr_t handler, + unsigned long irqflags) +{ + unsigned int irq; + int retval; + + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + retval = intr_add_handler(devname, irq, filter, handler, NULL, irqflags, NULL); + if (retval != 0) { + unbind_from_irq(irq); + return -retval; + } + + return irq; +} + +int +bind_virq_to_irqhandler(unsigned int virq, + unsigned int cpu, + const char *devname, + driver_filter_t filter, + driver_intr_t handler, + unsigned long irqflags) +{ + unsigned int irq; + int retval; + + irq = bind_virq_to_irq(virq, cpu); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + retval = intr_add_handler(devname, irq, filter, handler, NULL, irqflags, NULL); + if (retval != 0) { + unbind_from_irq(irq); + return -retval; + } + + return irq; +} + +int +bind_ipi_to_irqhandler(unsigned int ipi, + unsigned int cpu, + const char *devname, + driver_intr_t handler, + unsigned long irqflags) +{ + unsigned int irq; + int retval; + + irq = bind_ipi_to_irq(ipi, cpu); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + retval = intr_add_handler(devname, irq, NULL, handler, NULL, irqflags, NULL); + if (retval != 0) { + unbind_from_irq(irq); + return -retval; + } + + return irq; +} + +void +unbind_from_irqhandler(unsigned int irq, void *dev_id) +{ + if (dev_id) + intr_remove_handler(dev_id); /* XXX */ + unbind_from_irq(irq); +} + +#if 0 +/* Rebind an evtchn so that it gets delivered to a specific cpu */ +static void +rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu }; + int evtchn; + + mtx_lock_spin(&irq_mapping_update_lock); + + evtchn = evtchn_from_irq(irq); + if (!VALID_EVTCHN(evtchn)) { + mtx_unlock_spin(&irq_mapping_update_lock); + return; + } + + /* Send future instances of this interrupt to other vcpu. */ + bind_vcpu.port = evtchn; + bind_vcpu.vcpu = tcpu; + + /* + * If this fails, it usually just indicates that we're dealing with a + * virq or IPI channel, which don't actually need to be rebound. Ignore + * it, but don't do the xenlinux-level rebind in that case. + */ + if (HYPERVISOR_event_channel_op(&op) >= 0) + bind_evtchn_to_cpu(evtchn, tcpu); + + mtx_unlock_spin(&irq_mapping_update_lock); + +} + +static void set_affinity_irq(unsigned irq, xen_cpumask_t dest) +{ + unsigned tcpu = first_cpu(dest); + rebind_irq_to_cpu(irq, tcpu); +} +#endif + +/* + * Interface to generic handling in intr_machdep.c + */ + + +/*------------ interrupt handling --------------------------------------*/ +#define TODO printf("%s: not implemented!\n", __func__) + + +static void xenpic_dynirq_enable_source(struct intsrc *isrc); +static void xenpic_dynirq_disable_source(struct intsrc *isrc, int); +static void xenpic_dynirq_eoi_source(struct intsrc *isrc); +static void xenpic_dynirq_enable_intr(struct intsrc *isrc); +static void xenpic_dynirq_disable_intr(struct intsrc *isrc); + +static void xenpic_pirq_enable_source(struct intsrc *isrc); +static void xenpic_pirq_disable_source(struct intsrc *isrc, int); +static void xenpic_pirq_eoi_source(struct intsrc *isrc); +static void xenpic_pirq_enable_intr(struct intsrc *isrc); +static void xenpic_pirq_disable_intr(struct intsrc *isrc); + + +static int xenpic_vector(struct intsrc *isrc); +static int xenpic_source_pending(struct intsrc *isrc); +static void xenpic_suspend(struct pic* pic); +static void xenpic_resume(struct pic* pic); + + +struct pic xenpic_dynirq_template = { + .pic_enable_source = xenpic_dynirq_enable_source, + .pic_disable_source = xenpic_dynirq_disable_source, + .pic_eoi_source = xenpic_dynirq_eoi_source, + .pic_enable_intr = xenpic_dynirq_enable_intr, + .pic_disable_intr = xenpic_dynirq_disable_intr, + .pic_vector = xenpic_vector, + .pic_source_pending = xenpic_source_pending, + .pic_suspend = xenpic_suspend, + .pic_resume = xenpic_resume +}; + +struct pic xenpic_pirq_template = { + .pic_enable_source = xenpic_pirq_enable_source, + .pic_disable_source = xenpic_pirq_disable_source, + .pic_eoi_source = xenpic_pirq_eoi_source, + .pic_enable_intr = xenpic_pirq_enable_intr, + .pic_disable_intr = xenpic_pirq_disable_intr, + .pic_vector = xenpic_vector, + .pic_source_pending = xenpic_source_pending, + .pic_suspend = xenpic_suspend, + .pic_resume = xenpic_resume +}; + + +void +xenpic_dynirq_enable_source(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + + mtx_lock_spin(&irq_mapping_update_lock); + if (xp->xp_masked) { + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + xp->xp_masked = FALSE; + } + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_disable_source(struct intsrc *isrc, int foo) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + + mtx_lock_spin(&irq_mapping_update_lock); + if (!xp->xp_masked) { + irq = xenpic_vector(isrc); + mask_evtchn(evtchn_from_irq(irq)); + xp->xp_masked = TRUE; + } + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_enable_intr(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + mtx_lock_spin(&irq_mapping_update_lock); + xp->xp_masked = 0; + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_disable_intr(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + mtx_lock_spin(&irq_mapping_update_lock); + xp->xp_masked = 1; + irq = xenpic_vector(isrc); + mask_evtchn(evtchn_from_irq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_eoi_source(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + mtx_lock_spin(&irq_mapping_update_lock); + xp->xp_masked = 0; + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static int +xenpic_vector(struct intsrc *isrc) +{ + struct xenpic_intsrc *pin; + + pin = (struct xenpic_intsrc *)isrc; + //printf("xenpic_vector(): isrc=%p,vector=%u\n", pin, pin->xp_vector); + + return (pin->xp_vector); +} + +static int +xenpic_source_pending(struct intsrc *isrc) +{ + struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc; + + /* XXXEN: TODO */ + printf("xenpic_source_pending(): vector=%x,masked=%x\n", + pin->xp_vector, pin->xp_masked); + +/* notify_remote_via_evtchn(pin->xp_vector); // XXX RS: Is this correct? */ + return 0; +} + +static void +xenpic_suspend(struct pic* pic) +{ + TODO; +} + +static void +xenpic_resume(struct pic* pic) +{ + TODO; +} + +void +notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} + +/* required for support of physical devices */ +static inline void +pirq_unmask_notify(int pirq) +{ + struct physdev_eoi eoi = { .irq = pirq }; + + if (unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0]))) { + (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + } +} + +static inline void +pirq_query_unmask(int pirq) +{ + struct physdev_irq_status_query irq_status_query; + + irq_status_query.irq = pirq; + (void)HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY, &irq_status_query); + clear_bit(pirq, &pirq_needs_unmask_notify[0]); + if ( irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) + set_bit(pirq, &pirq_needs_unmask_notify[0]); +} + +/* + * On startup, if there is no action associated with the IRQ then we are + * probing. In this case we should not share with others as it will confuse us. + */ +#define probing_irq(_irq) (intr_lookup_source(irq) == NULL) + +static void +xenpic_pirq_enable_intr(struct intsrc *isrc) +{ + struct evtchn_bind_pirq bind_pirq; + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + goto out; + + bind_pirq.pirq = irq; + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; + + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + if (!probing_irq(irq)) /* Some failures are expected when probing. */ + printf("Failed to obtain physical IRQ %d\n", irq); + mtx_unlock_spin(&irq_mapping_update_lock); + return; + } + evtchn = bind_pirq.port; + + pirq_query_unmask(irq_to_pirq(irq)); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn); + + out: + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_pirq_disable_intr(struct intsrc *isrc) +{ + unsigned int irq; + int evtchn; + struct evtchn_close close; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + mask_evtchn(evtchn); + + close.port = evtchn; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_pirq_enable_source(struct intsrc *isrc) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_pirq_disable_source(struct intsrc *isrc, int eoi) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + mask_evtchn(evtchn); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + + +static void +xenpic_pirq_eoi_source(struct intsrc *isrc) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + +int +irq_to_evtchn_port(int irq) +{ + return evtchn_from_irq(irq); +} + +void +mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, &s->evtchn_mask[0]); +} + +void +unmask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned int cpu = smp_processor_id(); + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + return; + } + + synch_clear_bit(port, &s->evtchn_mask); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just + * like a real IO-APIC we 'lose the interrupt edge' if the channel is + * masked. + */ + if (synch_test_bit(port, &s->evtchn_pending) && + !synch_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) { + vcpu_info->evtchn_upcall_pending = 1; + if (!vcpu_info->evtchn_upcall_mask) + force_evtchn_callback(); + } +} + +void irq_resume(void) +{ + evtchn_op_t op; + int cpu, pirq, virq, ipi, irq, evtchn; + + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_ipi bind_ipi; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* Check that no PIRQs are still bound. */ + for (pirq = 0; pirq < NR_PIRQS; pirq++) + PANIC_IF(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND); + + /* Secondary CPUs must have no VIRQ or IPI bindings. */ + for (cpu = 1; cpu < NR_CPUS; cpu++) { + for (virq = 0; virq < NR_VIRQS; virq++) + PANIC_IF(per_cpu(virq_to_irq, cpu)[virq] != -1); + for (ipi = 0; ipi < NR_IPIS; ipi++) + PANIC_IF(per_cpu(ipi_to_irq, cpu)[ipi] != -1); + } + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + /* Primary CPU: rebind VIRQs automatically. */ + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, 0)[virq]) == -1) + continue; + + PANIC_IF(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0)); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = 0; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq) != 0); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } + + /* Primary CPU: rebind IPIs automatically. */ + for (ipi = 0; ipi < NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, 0)[ipi]) == -1) + continue; + + PANIC_IF(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0)); + + /* Get a new binding from Xen. */ + memset(&op, 0, sizeof(op)); + bind_ipi.vcpu = 0; + PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void +evtchn_init(void *dummy __unused) +{ + int i, cpu; + struct xenpic_intsrc *pin, *tpin; + + /* No VIRQ or IPI bindings. */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (i = 0; i < NR_VIRQS; i++) + per_cpu(virq_to_irq, cpu)[i] = -1; + for (i = 0; i < NR_IPIS; i++) + per_cpu(ipi_to_irq, cpu)[i] = -1; + } + + /* No event-channel -> IRQ mappings. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + evtchn_to_irq[i] = -1; + mask_evtchn(i); /* No event channels are 'live' right now. */ + } + + /* No IRQ -> event-channel mappings. */ + for (i = 0; i < NR_IRQS; i++) + irq_info[i] = IRQ_UNBOUND; + + xp = malloc(sizeof(struct xenpic) + NR_IRQS*sizeof(struct xenpic_intsrc), + M_DEVBUF, M_WAITOK); + + xp->xp_dynirq_pic = &xenpic_dynirq_template; + xp->xp_pirq_pic = &xenpic_pirq_template; + xp->xp_numintr = NR_IRQS; + bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_IRQS); + + + /* We need to register our PIC's beforehand */ + if (intr_register_pic(&xenpic_pirq_template)) + panic("XEN: intr_register_pic() failure"); + if (intr_register_pic(&xenpic_dynirq_template)) + panic("XEN: intr_register_pic() failure"); + + /* + * Initialize the dynamic IRQ's - we initialize the structures, but + * we do not bind them (bind_evtchn_to_irqhandle() does this) + */ + pin = xp->xp_pins; + for (i = 0; i < NR_DYNIRQS; i++) { + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + irq_bindcount[dynirq_to_irq(i)] = 0; + + tpin = &pin[dynirq_to_irq(i)]; + tpin->xp_intsrc.is_pic = xp->xp_dynirq_pic; + tpin->xp_vector = dynirq_to_irq(i); + + } + /* + * Now, we go ahead and claim every PIRQ there is. + */ + pin = xp->xp_pins; + for (i = 0; i < NR_PIRQS; i++) { + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + irq_bindcount[pirq_to_irq(i)] = 0; + +#ifdef RTC_IRQ + /* If not domain 0, force our RTC driver to fail its probe. */ + if ((i == RTC_IRQ) && + !(xen_start_info->flags & SIF_INITDOMAIN)) + continue; +#endif + tpin = &pin[pirq_to_irq(i)]; + tpin->xp_intsrc.is_pic = xp->xp_pirq_pic; + tpin->xp_vector = pirq_to_irq(i); + + } +} + +SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL); + /* + * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. + */ + +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_SPIN); diff --git a/sys/xen/evtchn/evtchn_dev.c b/sys/xen/evtchn/evtchn_dev.c new file mode 100644 index 0000000..a232cc1 --- /dev/null +++ b/sys/xen/evtchn/evtchn_dev.c @@ -0,0 +1,394 @@ +/****************************************************************************** + * evtchn.c + * + * Xenolinux driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/selinfo.h> +#include <sys/poll.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/ioccom.h> + +#include <machine/cpufunc.h> +#include <machine/intr_machdep.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/xen_intr.h> +#include <machine/bus.h> +#include <sys/rman.h> +#include <machine/resource.h> +#include <machine/xen/synch_bitops.h> +#include <machine/xen/hypervisor.h> +#include <machine/xen/evtchn.h> + + +typedef struct evtchn_sotfc { + + struct selinfo ev_rsel; +} evtchn_softc_t; + + +#ifdef linuxcrap +/* NB. This must be shared amongst drivers if more things go in /dev/xen */ +static devfs_handle_t xen_dev_dir; +#endif + +/* Only one process may open /dev/xen/evtchn at any time. */ +static unsigned long evtchn_dev_inuse; + +/* Notification ring, accessed via /dev/xen/evtchn. */ + +#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ + +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) +static uint16_t *ring; +static unsigned int ring_cons, ring_prod, ring_overflow; + +/* Which ports is user-space bound to? */ +static uint32_t bound_ports[32]; + +/* Unique address for processes to sleep on */ +static void *evtchn_waddr = ˚ + +static struct mtx lock, upcall_lock; + +static d_read_t evtchn_read; +static d_write_t evtchn_write; +static d_ioctl_t evtchn_ioctl; +static d_poll_t evtchn_poll; +static d_open_t evtchn_open; +static d_close_t evtchn_close; + + +void +evtchn_device_upcall(int port) +{ + mtx_lock(&upcall_lock); + + mask_evtchn(port); + clear_evtchn(port); + + if ( ring != NULL ) { + if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { + ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; + if ( ring_cons == ring_prod++ ) { + wakeup(evtchn_waddr); + } + } + else { + ring_overflow = 1; + } + } + + mtx_unlock(&upcall_lock); +} + +static void +__evtchn_reset_buffer_ring(void) +{ + /* Initialise the ring to empty. Clear errors. */ + ring_cons = ring_prod = ring_overflow = 0; +} + +static int +evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc; + unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; + count = uio->uio_resid; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + for ( ; ; ) { + if ( (c = ring_cons) != (p = ring_prod) ) + break; + + if ( ring_overflow ) { + rc = EFBIG; + goto out; + } + + if (sst != 0) { + rc = EINTR; + goto out; + } + + /* PCATCH == check for signals before and after sleeping + * PWAIT == priority of waiting on resource + */ + sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); + } + else { + bytes1 = (p - c) * sizeof(uint16_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if ( bytes1 > count ) { + bytes1 = count; + bytes2 = 0; + } + else if ( (bytes1 + bytes2) > count ) { + bytes2 = count - bytes1; + } + + if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || + ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) + /* keeping this around as its replacement is not equivalent + * copyout(&ring[0], &buf[bytes1], bytes2) + */ + { + rc = EFAULT; + goto out; + } + + ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); + + rc = bytes1 + bytes2; + + out: + + return rc; +} + +static int +evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc, i, count; + + count = uio->uio_resid; + + uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + + + if ( kbuf == NULL ) + return ENOMEM; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + if ( uiomove(kbuf, count, uio) != 0 ) { + rc = EFAULT; + goto out; + } + + mtx_lock_spin(&lock); + for ( i = 0; i < (count/2); i++ ) + if ( test_bit(kbuf[i], &bound_ports[0]) ) + unmask_evtchn(kbuf[i]); + mtx_unlock_spin(&lock); + + rc = count; + + out: + free(kbuf, M_DEVBUF); + return rc; +} + +static int +evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, + int mode, struct thread *td __unused) +{ + int rc = 0; + + mtx_lock_spin(&lock); + + switch ( cmd ) + { + case EVTCHN_RESET: + __evtchn_reset_buffer_ring(); + break; + case EVTCHN_BIND: + if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) ) + unmask_evtchn((int)arg); + else + rc = EINVAL; + break; + case EVTCHN_UNBIND: + if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) ) + mask_evtchn((int)arg); + else + rc = EINVAL; + break; + default: + rc = ENOSYS; + break; + } + + mtx_unlock_spin(&lock); + + return rc; +} + +static int +evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) +{ + + evtchn_softc_t *sc; + unsigned int mask = POLLOUT | POLLWRNORM; + + sc = dev->si_drv1; + + if ( ring_cons != ring_prod ) + mask |= POLLIN | POLLRDNORM; + else if ( ring_overflow ) + mask = POLLERR; + else + selrecord(td, &sc->ev_rsel); + + + return mask; +} + + +static int +evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + uint16_t *_ring; + + if (flag & O_NONBLOCK) + return EBUSY; + + if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) + return EBUSY; + + if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) + return ENOMEM; + + mtx_lock_spin(&lock); + ring = _ring; + __evtchn_reset_buffer_ring(); + mtx_unlock_spin(&lock); + + + return 0; +} + +static int +evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) +{ + int i; + + mtx_lock_spin(&lock); + if (ring != NULL) { + free(ring, M_DEVBUF); + ring = NULL; + } + for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) + if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) + mask_evtchn(i); + mtx_unlock_spin(&lock); + + evtchn_dev_inuse = 0; + + return 0; +} + +static struct cdevsw evtchn_devsw = { + d_version: D_VERSION, + d_open: evtchn_open, + d_close: evtchn_close, + d_read: evtchn_read, + d_write: evtchn_write, + d_ioctl: evtchn_ioctl, + d_poll: evtchn_poll, + d_name: "evtchn", + d_flags: 0, +}; + + +/* XXX - if this device is ever supposed to support use by more than one process + * this global static will have to go away + */ +static struct cdev *evtchn_dev; + + + +static int +evtchn_init(void *dummy __unused) +{ + /* XXX I believe we don't need these leaving them here for now until we + * have some semblance of it working + */ + mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); + + /* (DEVFS) create '/dev/misc/evtchn'. */ + evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); + + mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); + + evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); + bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); + + /* XXX I don't think we need any of this rubbish */ +#if 0 + if ( err != 0 ) + { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + /* (DEVFS) create directory '/dev/xen'. */ + xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); + + /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ + pos = devfs_generate_path(evtchn_miscdev.devfs_handle, + &link_dest[3], + sizeof(link_dest) - 3); + if ( pos >= 0 ) + strncpy(&link_dest[pos], "../", 3); + /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ + (void)devfs_mk_symlink(xen_dev_dir, + "evtchn", + DEVFS_FL_DEFAULT, + &link_dest[pos], + &symlink_handle, + NULL); + + /* (DEVFS) automatically destroy the symlink with its destination. */ + devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); +#endif + printk("Event-channel device installed.\n"); + + return 0; +} + + +SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL); + + diff --git a/sys/xen/gnttab.c b/sys/xen/gnttab.c new file mode 100644 index 0000000..11072f1 --- /dev/null +++ b/sys/xen/gnttab.c @@ -0,0 +1,597 @@ +/****************************************************************************** + * gnttab.c + * + * Two sets of functionality: + * 1. Granting foreign access to our memory reservation. + * 2. Accessing others' memory reservations via grant references. + * (i.e., mechanisms for both sender and recipient of grant references) + * + * Copyright (c) 2005, Christopher Clark + * Copyright (c) 2004, K A Fraser + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_global.h" +#include "opt_pmap.h" +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/module.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include <vm/vm_page.h> +#include <vm/vm_kern.h> + + + +#include <machine/xen/hypervisor.h> +#include <machine/xen/synch_bitops.h> +#include <xen/gnttab.h> + +#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c)) + + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \ + #_p , __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +#define WPRINTK(fmt, args...) \ + printk("xen_grant: " fmt, ##args) + +/* External tools reserve first few grant table entries. */ +#define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) + +static grant_ref_t **gnttab_list; +static unsigned int nr_grant_frames; +static unsigned int boot_max_nr_grant_frames; +static int gnttab_free_count; +static grant_ref_t gnttab_free_head; +static struct mtx gnttab_list_lock; + +static grant_entry_t *shared; + +static struct gnttab_free_callback *gnttab_free_callback_list = NULL; + +static int gnttab_expand(unsigned int req_entries); + +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) + +static int +get_free_entries(int count) +{ + int ref, rc; + grant_ref_t head; + + mtx_lock(&gnttab_list_lock); + if ((gnttab_free_count < count) && + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { + mtx_unlock(&gnttab_list_lock); + return (rc); + } + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_entry(head); + gnttab_free_head = gnttab_entry(head); + gnttab_entry(head) = GNTTAB_LIST_END; + mtx_unlock(&gnttab_list_lock); + return (ref); +} + +#define get_free_entry() get_free_entries(1) + +static void +do_free_callbacks(void) +{ + struct gnttab_free_callback *callback, *next; + + callback = gnttab_free_callback_list; + gnttab_free_callback_list = NULL; + + while (callback != NULL) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } +} + +static inline void +check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list != NULL)) + do_free_callbacks(); +} + +static void +put_free_entry(grant_ref_t ref) +{ + + mtx_lock(&gnttab_list_lock); + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + mtx_unlock(&gnttab_list_lock); +} + +/* + * Public grant-issuing interface functions + */ + +int +gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly) +{ + int ref; + + if (unlikely((ref = get_free_entry()) == -1)) + return -ENOSPC; + + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); + + return ref; +} + +void +gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly) +{ + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); +} + +int +gnttab_query_foreign_access(grant_ref_t ref) +{ + uint16_t nflags; + + nflags = shared[ref].flags; + + return (nflags & (GTF_reading|GTF_writing)); +} + +int +gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + uint16_t flags, nflags; + + nflags = shared[ref].flags; + do { + if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { + printf("WARNING: g.e. still in use!\n"); + return (0); + } + } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != + flags); + + return (1); +} + +void +gnttab_end_foreign_access(grant_ref_t ref, int readonly, void *page) +{ + if (gnttab_end_foreign_access_ref(ref, readonly)) { + put_free_entry(ref); + if (page != NULL) { + free(page, M_DEVBUF); + } + } + else { + /* XXX This needs to be fixed so that the ref and page are + placed on a list to be freed up later. */ + printf("WARNING: leaking g.e. and page still in use!\n"); + } +} + +int +gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) +{ + int ref; + + if (unlikely((ref = get_free_entry()) == -1)) + return -ENOSPC; + + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); + + return (ref); +} + +void +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) +{ + shared[ref].frame = pfn; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_accept_transfer; +} + +unsigned long +gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + unsigned long frame; + uint16_t flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { + if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) + return (0); + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = shared[ref].flags; + cpu_relax(); + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = shared[ref].frame; + PANIC_IF(frame == 0); + + return (frame); +} + +unsigned long +gnttab_end_foreign_transfer(grant_ref_t ref) +{ + unsigned long frame = gnttab_end_foreign_transfer_ref(ref); + + put_free_entry(ref); + return (frame); +} + +void +gnttab_free_grant_reference(grant_ref_t ref) +{ + + put_free_entry(ref); +} + +void +gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + int count = 1; + + if (head == GNTTAB_LIST_END) + return; + + mtx_lock(&gnttab_list_lock); + ref = head; + while (gnttab_entry(ref) != GNTTAB_LIST_END) { + ref = gnttab_entry(ref); + count++; + } + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + mtx_unlock(&gnttab_list_lock); +} + +int +gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) +{ + int h = get_free_entries(count); + + if (h == -1) + return -ENOSPC; + + *head = h; + + return 0; +} + +int +gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); +} + +int +gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + + if (unlikely(g == GNTTAB_LIST_END)) + return -ENOSPC; + *private_head = gnttab_entry(g); + + return (g); +} + +void +gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) +{ + gnttab_entry(release) = *private_head; + *private_head = release; +} + +void +gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, uint16_t count) +{ + + mtx_lock(&gnttab_list_lock); + if (callback->next) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); + out: + mtx_unlock(&gnttab_list_lock); + +} + +void +gnttab_cancel_free_callback(struct gnttab_free_callback *callback) +{ + struct gnttab_free_callback **pcb; + + mtx_lock(&gnttab_list_lock); + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { + if (*pcb == callback) { + *pcb = callback->next; + break; + } + } + mtx_unlock(&gnttab_list_lock); +} + + +static int +grow_gnttab_list(unsigned int more_frames) +{ + unsigned int new_nr_grant_frames, extra_entries, i; + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + + for (i = nr_grant_frames; i < new_nr_grant_frames; i++) + { + gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + + if (!gnttab_list[i]) + goto grow_nomem; + } + + for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; + i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(i) = gnttab_free_head; + gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return 0; + +grow_nomem: + for ( ; i >= nr_grant_frames; i--) + free(gnttab_list[i], M_DEVBUF); + return (-ENOMEM); +} + +static unsigned int +__max_nr_grant_frames(void) +{ + struct gnttab_query_size query; + int rc; + + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return (4); /* Legacy max supported number of frames */ + + return (query.max_nr_frames); +} + +static inline +unsigned int max_nr_grant_frames(void) +{ + unsigned int xen_max = __max_nr_grant_frames(); + + if (xen_max > boot_max_nr_grant_frames) + return (boot_max_nr_grant_frames); + return (xen_max); +} + +#ifdef notyet +/* + * XXX needed for backend support + * + */ +static int +map_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +static int +unmap_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} +#endif + +static int +gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct gnttab_setup_table setup; + unsigned long *frames; + unsigned int nr_gframes = end_idx + 1; + int i, rc; + + frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT); + if (!frames) + return -ENOMEM; + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_gframes; + set_xen_guest_handle(setup.frame_list, frames); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (rc == -ENOSYS) { + free(frames, M_DEVBUF); + return -ENOSYS; + } + PANIC_IF(rc || setup.status); + + if (shared == NULL) { + vm_offset_t area; + + area = kmem_alloc_nofault(kernel_map, + PAGE_SIZE * max_nr_grant_frames()); + PANIC_IF(area == 0); + shared = (grant_entry_t *)area; + } + for (i = 0; i < nr_gframes; i++) + PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, + frames[i] << PAGE_SHIFT | PG_RW | PG_V); + + free(frames, M_DEVBUF); + + return 0; +} + +int +gnttab_resume(void) +{ + if (max_nr_grant_frames() < nr_grant_frames) + return -ENOSYS; + return gnttab_map(0, nr_grant_frames - 1); +} + +int +gnttab_suspend(void) +{ + int i, pages; + + pages = (PAGE_SIZE*nr_grant_frames) >> PAGE_SHIFT; + + for (i = 0; i < pages; i++) + PT_SET_MA(shared + (i*PAGE_SIZE), (vm_paddr_t)0); + + return (0); +} + +static int +gnttab_expand(unsigned int req_entries) +{ + int rc; + unsigned int cur, extra; + + cur = nr_grant_frames; + extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / + GREFS_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return -ENOSPC; + + if ((rc = gnttab_map(cur, cur + extra - 1)) == 0) + rc = grow_gnttab_list(extra); + + return rc; +} + +static int +gnttab_init(void *unused) +{ + int i; + unsigned int max_nr_glist_frames; + unsigned int nr_init_grefs; + + if (!is_running_on_xen()) + return -ENODEV; + + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + + /* Determine the maximum number of frames required for the + * grant reference free list on the current hypervisor. + */ + max_nr_glist_frames = (boot_max_nr_grant_frames * + GREFS_PER_GRANT_FRAME / + (PAGE_SIZE / sizeof(grant_ref_t))); + + gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), + M_DEVBUF, M_NOWAIT); + + if (gnttab_list == NULL) + return -ENOMEM; + + for (i = 0; i < nr_grant_frames; i++) { + gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (gnttab_list[i] == NULL) + goto ini_nomem; + } + + if (gnttab_resume() < 0) + return -ENODEV; + + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; + gnttab_free_head = NR_RESERVED_ENTRIES; + + printk("Grant table initialized\n"); + return 0; + +ini_nomem: + for (i--; i >= 0; i--) + free(gnttab_list[i], M_DEVBUF); + free(gnttab_list, M_DEVBUF); + return -ENOMEM; + +} + +MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); +SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL); diff --git a/sys/xen/gnttab.h b/sys/xen/gnttab.h new file mode 100644 index 0000000..81b2962 --- /dev/null +++ b/sys/xen/gnttab.h @@ -0,0 +1,138 @@ +/****************************************************************************** + * gnttab.h + * + * Two sets of functionality: + * 1. Granting foreign access to our memory reservation. + * 2. Accessing others' memory reservations via grant references. + * (i.e., mechanisms for both sender and recipient of grant references) + * + * Copyright (c) 2004-2005, K A Fraser + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __ASM_GNTTAB_H__ + +#include <machine/xen/hypervisor.h> +#include <xen/interface/grant_table.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/hypervisor.h> +#include <machine/xen/features.h> + +struct gnttab_free_callback { + struct gnttab_free_callback *next; + void (*fn)(void *); + void *arg; + uint16_t count; +}; + +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int readonly); + +/* + * End access through the given grant reference, iff the grant entry is no + * longer in use. Return 1 if the grant entry was freed, 0 if it is still in + * use. + */ +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); + +/* + * Eventually end access through the given grant reference, and once that + * access has been ended, free the given page too. Access will be ended + * immediately iff the grant entry is not in use, otherwise it will happen + * some time later. page may be 0, in which case no freeing will occur. + */ +void gnttab_end_foreign_access(grant_ref_t ref, int readonly, + void *page); + +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +int gnttab_query_foreign_access(grant_ref_t ref); + +/* + * operations on reserved batches of grant references + */ +int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *pprivate_head); + +void gnttab_free_grant_reference(grant_ref_t ref); + +void gnttab_free_grant_references(grant_ref_t head); + +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + +int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); + +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release); + +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, uint16_t count); +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); + +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); + +int gnttab_suspend(void); +int gnttab_resume(void); + +static inline void +gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr, + uint32_t flags, grant_ref_t ref, domid_t domid) +{ + if (flags & GNTMAP_contains_pte) + map->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + map->host_addr = vtophys(addr); + else + map->host_addr = addr; + + map->flags = flags; + map->ref = ref; + map->dom = domid; +} + +static inline void +gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, vm_paddr_t addr, + uint32_t flags, grant_handle_t handle) +{ + if (flags & GNTMAP_contains_pte) + unmap->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + unmap->host_addr = vtophys(addr); + else + unmap->host_addr = addr; + + unmap->handle = handle; + unmap->dev_bus_addr = 0; +} + +#endif /* __ASM_GNTTAB_H__ */ diff --git a/sys/xen/interface/COPYING b/sys/xen/interface/COPYING new file mode 100644 index 0000000..ffc6d61 --- /dev/null +++ b/sys/xen/interface/COPYING @@ -0,0 +1,38 @@ +XEN NOTICE +========== + +This copyright applies to all files within this subdirectory and its +subdirectories: + include/public/*.h + include/public/hvm/*.h + include/public/io/*.h + +The intention is that these files can be freely copied into the source +tree of an operating system when porting that OS to run on Xen. Doing +so does *not* cause the OS to become subject to the terms of the GPL. + +All other files in the Xen source distribution are covered by version +2 of the GNU General Public License except where explicitly stated +otherwise within individual source files. + + -- Keir Fraser (on behalf of the Xen team) + +===================================================================== + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/sys/xen/interface/acm.h b/sys/xen/interface/acm.h new file mode 100644 index 0000000..ef62da0 --- /dev/null +++ b/sys/xen/interface/acm.h @@ -0,0 +1,228 @@ +/* + * acm.h: Xen access control module interface defintions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Reiner Sailer <sailer@watson.ibm.com> + * Copyright (c) 2005, International Business Machines Corporation. + */ + +#ifndef _XEN_PUBLIC_ACM_H +#define _XEN_PUBLIC_ACM_H + +#include "xen.h" + +/* if ACM_DEBUG defined, all hooks should + * print a short trace message (comment it out + * when not in testing mode ) + */ +/* #define ACM_DEBUG */ + +#ifdef ACM_DEBUG +# define printkd(fmt, args...) printk(fmt,## args) +#else +# define printkd(fmt, args...) +#endif + +/* default ssid reference value if not supplied */ +#define ACM_DEFAULT_SSID 0x0 +#define ACM_DEFAULT_LOCAL_SSID 0x0 + +/* Internal ACM ERROR types */ +#define ACM_OK 0 +#define ACM_UNDEF -1 +#define ACM_INIT_SSID_ERROR -2 +#define ACM_INIT_SOID_ERROR -3 +#define ACM_ERROR -4 + +/* External ACCESS DECISIONS */ +#define ACM_ACCESS_PERMITTED 0 +#define ACM_ACCESS_DENIED -111 +#define ACM_NULL_POINTER_ERROR -200 + +/* + Error codes reported in when trying to test for a new policy + These error codes are reported in an array of tuples where + each error code is followed by a parameter describing the error + more closely, such as a domain id. +*/ +#define ACM_EVTCHN_SHARING_VIOLATION 0x100 +#define ACM_GNTTAB_SHARING_VIOLATION 0x101 +#define ACM_DOMAIN_LOOKUP 0x102 +#define ACM_CHWALL_CONFLICT 0x103 +#define ACM_SSIDREF_IN_USE 0x104 + + +/* primary policy in lower 4 bits */ +#define ACM_NULL_POLICY 0 +#define ACM_CHINESE_WALL_POLICY 1 +#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 +#define ACM_POLICY_UNDEFINED 15 + +/* combinations have secondary policy component in higher 4bit */ +#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ + ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) + +/* policy: */ +#define ACM_POLICY_NAME(X) \ + ((X) == (ACM_NULL_POLICY)) ? "NULL" : \ + ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" : \ + ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \ + ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \ + "UNDEFINED" + +/* the following policy versions must be increased + * whenever the interpretation of the related + * policy's data structure changes + */ +#define ACM_POLICY_VERSION 3 +#define ACM_CHWALL_VERSION 1 +#define ACM_STE_VERSION 1 + +/* defines a ssid reference used by xen */ +typedef uint32_t ssidref_t; + +/* hooks that are known to domains */ +#define ACMHOOK_none 0 +#define ACMHOOK_sharing 1 + +/* -------security policy relevant type definitions-------- */ + +/* type identifier; compares to "equal" or "not equal" */ +typedef uint16_t domaintype_t; + +/* CHINESE WALL POLICY DATA STRUCTURES + * + * current accumulated conflict type set: + * When a domain is started and has a type that is in + * a conflict set, the conflicting types are incremented in + * the aggregate set. When a domain is destroyed, the + * conflicting types to its type are decremented. + * If a domain has multiple types, this procedure works over + * all those types. + * + * conflict_aggregate_set[i] holds the number of + * running domains that have a conflict with type i. + * + * running_types[i] holds the number of running domains + * that include type i in their ssidref-referenced type set + * + * conflict_sets[i][j] is "0" if type j has no conflict + * with type i and is "1" otherwise. + */ +/* high-16 = version, low-16 = check magic */ +#define ACM_MAGIC 0x0001debc + +/* each offset in bytes from start of the struct they + * are part of */ + +/* V3 of the policy buffer aded a version structure */ +struct acm_policy_version +{ + uint32_t major; + uint32_t minor; +}; + + +/* each buffer consists of all policy information for + * the respective policy given in the policy code + * + * acm_policy_buffer, acm_chwall_policy_buffer, + * and acm_ste_policy_buffer need to stay 32-bit aligned + * because we create binary policies also with external + * tools that assume packed representations (e.g. the java tool) + */ +struct acm_policy_buffer { + uint32_t policy_version; /* ACM_POLICY_VERSION */ + uint32_t magic; + uint32_t len; + uint32_t policy_reference_offset; + uint32_t primary_policy_code; + uint32_t primary_buffer_offset; + uint32_t secondary_policy_code; + uint32_t secondary_buffer_offset; + struct acm_policy_version xml_pol_version; /* add in V3 */ +}; + + +struct acm_policy_reference_buffer { + uint32_t len; +}; + +struct acm_chwall_policy_buffer { + uint32_t policy_version; /* ACM_CHWALL_VERSION */ + uint32_t policy_code; + uint32_t chwall_max_types; + uint32_t chwall_max_ssidrefs; + uint32_t chwall_max_conflictsets; + uint32_t chwall_ssid_offset; + uint32_t chwall_conflict_sets_offset; + uint32_t chwall_running_types_offset; + uint32_t chwall_conflict_aggregate_offset; +}; + +struct acm_ste_policy_buffer { + uint32_t policy_version; /* ACM_STE_VERSION */ + uint32_t policy_code; + uint32_t ste_max_types; + uint32_t ste_max_ssidrefs; + uint32_t ste_ssid_offset; +}; + +struct acm_stats_buffer { + uint32_t magic; + uint32_t len; + uint32_t primary_policy_code; + uint32_t primary_stats_offset; + uint32_t secondary_policy_code; + uint32_t secondary_stats_offset; +}; + +struct acm_ste_stats_buffer { + uint32_t ec_eval_count; + uint32_t gt_eval_count; + uint32_t ec_denied_count; + uint32_t gt_denied_count; + uint32_t ec_cachehit_count; + uint32_t gt_cachehit_count; +}; + +struct acm_ssid_buffer { + uint32_t len; + ssidref_t ssidref; + uint32_t policy_reference_offset; + uint32_t primary_policy_code; + uint32_t primary_max_types; + uint32_t primary_types_offset; + uint32_t secondary_policy_code; + uint32_t secondary_max_types; + uint32_t secondary_types_offset; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/acm_ops.h b/sys/xen/interface/acm_ops.h new file mode 100644 index 0000000..27a8872 --- /dev/null +++ b/sys/xen/interface/acm_ops.h @@ -0,0 +1,159 @@ +/* + * acm_ops.h: Xen access control module hypervisor commands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Reiner Sailer <sailer@watson.ibm.com> + * Copyright (c) 2005,2006 International Business Machines Corporation. + */ + +#ifndef __XEN_PUBLIC_ACM_OPS_H__ +#define __XEN_PUBLIC_ACM_OPS_H__ + +#include "xen.h" +#include "acm.h" + +/* + * Make sure you increment the interface version whenever you modify this file! + * This makes sure that old versions of acm tools will stop working in a + * well-defined way (rather than crashing the machine, for instance). + */ +#define ACM_INTERFACE_VERSION 0xAAAA000A + +/************************************************************************/ + +/* + * Prototype for this hypercall is: + * int acm_op(int cmd, void *args) + * @cmd == ACMOP_??? (access control module operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + + +#define ACMOP_setpolicy 1 +struct acm_setpolicy { + /* IN */ + XEN_GUEST_HANDLE_64(void) pushcache; + uint32_t pushcache_size; +}; + + +#define ACMOP_getpolicy 2 +struct acm_getpolicy { + /* IN */ + XEN_GUEST_HANDLE_64(void) pullcache; + uint32_t pullcache_size; +}; + + +#define ACMOP_dumpstats 3 +struct acm_dumpstats { + /* IN */ + XEN_GUEST_HANDLE_64(void) pullcache; + uint32_t pullcache_size; +}; + + +#define ACMOP_getssid 4 +#define ACM_GETBY_ssidref 1 +#define ACM_GETBY_domainid 2 +struct acm_getssid { + /* IN */ + uint32_t get_ssid_by; /* ACM_GETBY_* */ + union { + domaintype_t domainid; + ssidref_t ssidref; + } id; + XEN_GUEST_HANDLE_64(void) ssidbuf; + uint32_t ssidbuf_size; +}; + +#define ACMOP_getdecision 5 +struct acm_getdecision { + /* IN */ + uint32_t get_decision_by1; /* ACM_GETBY_* */ + uint32_t get_decision_by2; /* ACM_GETBY_* */ + union { + domaintype_t domainid; + ssidref_t ssidref; + } id1; + union { + domaintype_t domainid; + ssidref_t ssidref; + } id2; + uint32_t hook; + /* OUT */ + uint32_t acm_decision; +}; + + +#define ACMOP_chgpolicy 6 +struct acm_change_policy { + /* IN */ + XEN_GUEST_HANDLE_64(void) policy_pushcache; + uint32_t policy_pushcache_size; + XEN_GUEST_HANDLE_64(void) del_array; + uint32_t delarray_size; + XEN_GUEST_HANDLE_64(void) chg_array; + uint32_t chgarray_size; + /* OUT */ + /* array with error code */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +#define ACMOP_relabeldoms 7 +struct acm_relabel_doms { + /* IN */ + XEN_GUEST_HANDLE_64(void) relabel_map; + uint32_t relabel_map_size; + /* OUT */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +/* future interface to Xen */ +struct xen_acmctl { + uint32_t cmd; + uint32_t interface_version; + union { + struct acm_setpolicy setpolicy; + struct acm_getpolicy getpolicy; + struct acm_dumpstats dumpstats; + struct acm_getssid getssid; + struct acm_getdecision getdecision; + struct acm_change_policy change_policy; + struct acm_relabel_doms relabel_doms; + } u; +}; + +typedef struct xen_acmctl xen_acmctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); + +#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/arch-ia64.h b/sys/xen/interface/arch-ia64.h new file mode 100644 index 0000000..a9afa3b --- /dev/null +++ b/sys/xen/interface/arch-ia64.h @@ -0,0 +1,522 @@ +/****************************************************************************** + * arch-ia64/hypervisor-if.h + * + * Guest OS interface to IA64 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HYPERVISOR_IF_IA64_H__ +#define __HYPERVISOR_IF_IA64_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#define uint64_aligned_t uint64_t +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +__DEFINE_XEN_GUEST_HANDLE(u64, unsigned long); +DEFINE_XEN_GUEST_HANDLE(char); +DEFINE_XEN_GUEST_HANDLE(int); +DEFINE_XEN_GUEST_HANDLE(long); +DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#define PRI_xen_pfn "lx" +#endif + +/* Arch specific VIRQs definition */ +#define VIRQ_ITC VIRQ_ARCH_0 /* V. Virtual itc timer */ +#define VIRQ_MCA_CMC VIRQ_ARCH_1 /* MCA cmc interrupt */ +#define VIRQ_MCA_CPE VIRQ_ARCH_2 /* MCA cpe interrupt */ + +/* Maximum number of virtual CPUs in multi-processor guests. */ +/* WARNING: before changing this, check that shared_info fits on a page */ +#define MAX_VIRT_CPUS 64 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; + +#define INVALID_MFN (~0UL) + +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) +#define MEM_K (1UL << 10) + +#define MMIO_START (3 * MEM_G) +#define MMIO_SIZE (512 * MEM_M) + +#define VGA_IO_START 0xA0000UL +#define VGA_IO_SIZE 0x20000 + +#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) +#define LEGACY_IO_SIZE (64*MEM_M) + +#define IO_PAGE_START (LEGACY_IO_START + LEGACY_IO_SIZE) +#define IO_PAGE_SIZE PAGE_SIZE + +#define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE) +#define STORE_PAGE_SIZE PAGE_SIZE + +#define BUFFER_IO_PAGE_START (STORE_PAGE_START+STORE_PAGE_SIZE) +#define BUFFER_IO_PAGE_SIZE PAGE_SIZE + +#define BUFFER_PIO_PAGE_START (BUFFER_IO_PAGE_START+BUFFER_IO_PAGE_SIZE) +#define BUFFER_PIO_PAGE_SIZE PAGE_SIZE + +#define IO_SAPIC_START 0xfec00000UL +#define IO_SAPIC_SIZE 0x100000 + +#define PIB_START 0xfee00000UL +#define PIB_SIZE 0x200000 + +#define GFW_START (4*MEM_G -16*MEM_M) +#define GFW_SIZE (16*MEM_M) + +/* Nvram belongs to GFW memory space */ +#define NVRAM_SIZE (MEM_K * 64) +#define NVRAM_START (GFW_START + 10 * MEM_M) + +#define NVRAM_VALID_SIG 0x4650494e45584948 // "HIXENIPF" +struct nvram_save_addr { + unsigned long addr; + unsigned long signature; +}; + +struct pt_fpreg { + union { + unsigned long bits[2]; + long double __dummy; /* force 16-byte alignment */ + } u; +}; + +struct cpu_user_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + unsigned long cr_ifs; /* interrupted task's function state */ + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0: */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct pt_fpreg f6; /* scratch */ + struct pt_fpreg f7; /* scratch */ + struct pt_fpreg f8; /* scratch */ + struct pt_fpreg f9; /* scratch */ + struct pt_fpreg f10; /* scratch */ + struct pt_fpreg f11; /* scratch */ + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + unsigned long eml_unat; /* used for emulating instruction */ + unsigned long pad0; /* alignment pad */ + +}; +typedef struct cpu_user_regs cpu_user_regs_t; + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; +typedef union vac vac_t; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; +typedef union vdc vdc_t; + +struct mapped_regs { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + union { + unsigned long vgr[16]; + unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active + }; + union { + unsigned long vbgr[16]; + unsigned long bank0_regs[16]; // bank0 regs (r16-r31) when bank1 active + }; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; // CR0 + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; // CR8 + unsigned long rsv2[7]; + unsigned long ipsr; // CR16 + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; // CR24 + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; // CR64 + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; // CR72 + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; // CR80 + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + union { + unsigned long reserved5[128]; + struct { + unsigned long precover_ifs; + unsigned long unat; // not sure if this is needed until NaT arch is done + int interrupt_collection_enabled; // virtual psr.ic + /* virtual interrupt deliverable flag is evtchn_upcall_mask in + * shared info area now. interrupt_mask_addr is the address + * of evtchn_upcall_mask for current vcpu + */ + unsigned char *interrupt_mask_addr; + int pending_interruption; + unsigned char vpsr_pp; + unsigned char vpsr_dfh; + unsigned char hpsr_dfh; + unsigned char hpsr_mfh; + unsigned long reserved5_1[4]; + int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual + int banknum; // 0 or 1, which virtual register bank is active + unsigned long rrs[8]; // region registers + unsigned long krs[8]; // kernel registers + unsigned long pkrs[8]; // protection key registers + unsigned long tmp[8]; // temp registers (e.g. for hyperprivops) + }; + }; +}; +typedef struct mapped_regs mapped_regs_t; + +struct vpd { + struct mapped_regs vpd_low; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; +typedef struct vpd vpd_t; + +struct arch_vcpu_info { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct arch_shared_info { + /* PFN of the start_info page. */ + unsigned long start_info_pfn; + + /* Interrupt vector for event channel. */ + int evtchn_vector; + + uint64_t pad[32]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +typedef unsigned long xen_callback_t; + +struct ia64_tr_entry { + unsigned long pte; + unsigned long itir; + unsigned long vadr; + unsigned long rid; +}; + +struct vcpu_extra_regs { + struct ia64_tr_entry itrs[8]; + struct ia64_tr_entry dtrs[8]; + unsigned long iva; + unsigned long dcr; + unsigned long event_callback_ip; +}; + +struct vcpu_guest_context { +#define VGCF_EXTRA_REGS (1<<1) /* Get/Set extra regs. */ + unsigned long flags; /* VGCF_* flags */ + + struct cpu_user_regs user_regs; + struct vcpu_extra_regs extra_regs; + unsigned long privregs_pfn; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +/* dom0 vp op */ +#define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0 +/* Map io space in machine address to dom0 physical address space. + Currently physical assigned address equals to machine address. */ +#define IA64_DOM0VP_ioremap 0 + +/* Convert a pseudo physical page frame number to the corresponding + machine page frame number. If no page is assigned, INVALID_MFN or + GPFN_INV_MASK is returned depending on domain's non-vti/vti mode. */ +#define IA64_DOM0VP_phystomach 1 + +/* Convert a machine page frame number to the corresponding pseudo physical + page frame number of the caller domain. */ +#define IA64_DOM0VP_machtophys 3 + +/* Reserved for future use. */ +#define IA64_DOM0VP_iounmap 4 + +/* Unmap and free pages contained in the specified pseudo physical region. */ +#define IA64_DOM0VP_zap_physmap 5 + +/* Assign machine page frame to dom0's pseudo physical address space. */ +#define IA64_DOM0VP_add_physmap 6 + +/* expose the p2m table into domain */ +#define IA64_DOM0VP_expose_p2m 7 + +/* xen perfmon */ +#define IA64_DOM0VP_perfmon 8 + +/* gmfn version of IA64_DOM0VP_add_physmap */ +#define IA64_DOM0VP_add_physmap_with_gmfn 9 + +/* Add an I/O port space range */ +#define IA64_DOM0VP_add_io_space 11 + +// flags for page assignement to pseudo physical address space +#define _ASSIGN_readonly 0 +#define ASSIGN_readonly (1UL << _ASSIGN_readonly) +#define ASSIGN_writable (0UL << _ASSIGN_readonly) // dummy flag +/* Internal only: memory attribute must be WC/UC/UCE. */ +#define _ASSIGN_nocache 1 +#define ASSIGN_nocache (1UL << _ASSIGN_nocache) +// tlb tracking +#define _ASSIGN_tlb_track 2 +#define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track) +/* Internal only: associated with PGC_allocated bit */ +#define _ASSIGN_pgc_allocated 3 +#define ASSIGN_pgc_allocated (1UL << _ASSIGN_pgc_allocated) + +/* This structure has the same layout of struct ia64_boot_param, defined in + <asm/system.h>. It is redefined here to ease use. */ +struct xen_ia64_boot_param { + unsigned long command_line; /* physical address of cmd line args */ + unsigned long efi_systab; /* physical address of EFI system table */ + unsigned long efi_memmap; /* physical address of EFI memory map */ + unsigned long efi_memmap_size; /* size of EFI memory map */ + unsigned long efi_memdesc_size; /* size of an EFI memory map descriptor */ + unsigned int efi_memdesc_version; /* memory descriptor version */ + struct { + unsigned short num_cols; /* number of columns on console. */ + unsigned short num_rows; /* number of rows on console. */ + unsigned short orig_x; /* cursor's x position */ + unsigned short orig_y; /* cursor's y position */ + } console_info; + unsigned long fpswa; /* physical address of the fpswa interface */ + unsigned long initrd_start; + unsigned long initrd_size; + unsigned long domain_start; /* va where the boot time domain begins */ + unsigned long domain_size; /* how big is the boot domain */ +}; + +#endif /* !__ASSEMBLY__ */ + +/* Size of the shared_info area (this is not related to page size). */ +#define XSI_SHIFT 14 +#define XSI_SIZE (1 << XSI_SHIFT) +/* Log size of mapped_regs area (64 KB - only 4KB is used). */ +#define XMAPPEDREGS_SHIFT 12 +#define XMAPPEDREGS_SIZE (1 << XMAPPEDREGS_SHIFT) +/* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ +#define XMAPPEDREGS_OFS XSI_SIZE + +/* Hyperprivops. */ +#define HYPERPRIVOP_START 0x1 +#define HYPERPRIVOP_RFI (HYPERPRIVOP_START + 0x0) +#define HYPERPRIVOP_RSM_DT (HYPERPRIVOP_START + 0x1) +#define HYPERPRIVOP_SSM_DT (HYPERPRIVOP_START + 0x2) +#define HYPERPRIVOP_COVER (HYPERPRIVOP_START + 0x3) +#define HYPERPRIVOP_ITC_D (HYPERPRIVOP_START + 0x4) +#define HYPERPRIVOP_ITC_I (HYPERPRIVOP_START + 0x5) +#define HYPERPRIVOP_SSM_I (HYPERPRIVOP_START + 0x6) +#define HYPERPRIVOP_GET_IVR (HYPERPRIVOP_START + 0x7) +#define HYPERPRIVOP_GET_TPR (HYPERPRIVOP_START + 0x8) +#define HYPERPRIVOP_SET_TPR (HYPERPRIVOP_START + 0x9) +#define HYPERPRIVOP_EOI (HYPERPRIVOP_START + 0xa) +#define HYPERPRIVOP_SET_ITM (HYPERPRIVOP_START + 0xb) +#define HYPERPRIVOP_THASH (HYPERPRIVOP_START + 0xc) +#define HYPERPRIVOP_PTC_GA (HYPERPRIVOP_START + 0xd) +#define HYPERPRIVOP_ITR_D (HYPERPRIVOP_START + 0xe) +#define HYPERPRIVOP_GET_RR (HYPERPRIVOP_START + 0xf) +#define HYPERPRIVOP_SET_RR (HYPERPRIVOP_START + 0x10) +#define HYPERPRIVOP_SET_KR (HYPERPRIVOP_START + 0x11) +#define HYPERPRIVOP_FC (HYPERPRIVOP_START + 0x12) +#define HYPERPRIVOP_GET_CPUID (HYPERPRIVOP_START + 0x13) +#define HYPERPRIVOP_GET_PMD (HYPERPRIVOP_START + 0x14) +#define HYPERPRIVOP_GET_EFLAG (HYPERPRIVOP_START + 0x15) +#define HYPERPRIVOP_SET_EFLAG (HYPERPRIVOP_START + 0x16) +#define HYPERPRIVOP_RSM_BE (HYPERPRIVOP_START + 0x17) +#define HYPERPRIVOP_GET_PSR (HYPERPRIVOP_START + 0x18) +#define HYPERPRIVOP_MAX (0x19) + +/* Fast and light hypercalls. */ +#define __HYPERVISOR_ia64_fast_eoi __HYPERVISOR_arch_1 + +/* Xencomm macros. */ +#define XENCOMM_INLINE_MASK 0xf800000000000000UL +#define XENCOMM_INLINE_FLAG 0x8000000000000000UL + +#define XENCOMM_IS_INLINE(addr) \ + (((unsigned long)(addr) & XENCOMM_INLINE_MASK) == XENCOMM_INLINE_FLAG) +#define XENCOMM_INLINE_ADDR(addr) \ + ((unsigned long)(addr) & ~XENCOMM_INLINE_MASK) + +/* xen perfmon */ +#ifdef XEN +#ifndef __ASSEMBLY__ +#ifndef _ASM_IA64_PERFMON_H + +#include <xen/list.h> // asm/perfmon.h requires struct list_head +#include <asm/perfmon.h> +// for PFM_xxx and pfarg_features_t, pfarg_context_t, pfarg_reg_t, pfarg_load_t + +#endif /* _ASM_IA64_PERFMON_H */ + +DEFINE_XEN_GUEST_HANDLE(pfarg_features_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_context_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_reg_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_load_t); +#endif /* __ASSEMBLY__ */ +#endif /* XEN */ + +#endif /* __HYPERVISOR_IF_IA64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/arch-powerpc.h b/sys/xen/interface/arch-powerpc.h new file mode 100644 index 0000000..82854d7 --- /dev/null +++ b/sys/xen/interface/arch-powerpc.h @@ -0,0 +1,125 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2005, 2006 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __XEN_PUBLIC_ARCH_PPC_64_H__ +#define __XEN_PUBLIC_ARCH_PPC_64_H__ + +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { \ + int __pad[(sizeof (long long) - sizeof (void *)) / sizeof (int)]; \ + type *p; \ + } __attribute__((__aligned__(8))) __guest_handle_ ## name + +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof ((hnd).__pad)) \ + (hnd).__pad[0] = 0; \ + (hnd).p = val; \ + } while (0) + +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +DEFINE_XEN_GUEST_HANDLE(char); +DEFINE_XEN_GUEST_HANDLE(int); +DEFINE_XEN_GUEST_HANDLE(long); +DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#define PRI_xen_pfn "llx" +#endif + +/* + * Pointers and other address fields inside interface structures are padded to + * 64 bits. This means that field alignments aren't different between 32- and + * 64-bit architectures. + */ +/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */ +#define __MEMORY_PADDING(_X) +#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X) +#define MEMORY_PADDING _MEMORY_PADDING(__LINE__) + +/* And the trap vector is... */ +#define TRAP_INSTR "li 0,-1; sc" /* XXX just "sc"? */ + +#ifndef __ASSEMBLY__ + +#define XENCOMM_INLINE_FLAG (1UL << 63) + +typedef uint64_t xen_ulong_t; + +/* User-accessible registers: nost of these need to be saved/restored + * for every nested Xen invocation. */ +struct cpu_user_regs +{ + uint64_t gprs[32]; + uint64_t lr; + uint64_t ctr; + uint64_t srr0; + uint64_t srr1; + uint64_t pc; + uint64_t msr; + uint64_t fpscr; /* XXX Is this necessary */ + uint64_t xer; + uint64_t hid4; /* debug only */ + uint64_t dar; /* debug only */ + uint32_t dsisr; /* debug only */ + uint32_t cr; + uint32_t __pad; /* good spot for another 32bit reg */ + uint32_t entry_vector; +}; +typedef struct cpu_user_regs cpu_user_regs_t; + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ + +/* ONLY used to communicate with dom0! See also struct exec_domain. */ +struct vcpu_guest_context { + cpu_user_regs_t user_regs; /* User-level CPU registers */ + uint64_t sdr1; /* Pagetable base */ + /* XXX etc */ +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + uint64_t boot_timebase; +}; + +struct arch_vcpu_info { +}; + +/* Support for multi-processor guests. */ +#define MAX_VIRT_CPUS 32 +#endif + +#endif diff --git a/sys/xen/interface/arch-x86/xen-x86_32.h b/sys/xen/interface/arch-x86/xen-x86_32.h new file mode 100644 index 0000000..d4bc0ac --- /dev/null +++ b/sys/xen/interface/arch-x86/xen-x86_32.h @@ -0,0 +1,200 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#include <sys/param.h> +#include <sys/types.h> + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct name { type *p; } __guest_handle_ ## name + +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; int $0x82 + */ +#define TRAP_INSTR "int $0x82" +#endif + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#ifdef CONFIG_X86_PAE +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE +#else +#warning "not using PAE!!!" +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_NONPAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_NONPAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_NONPAE +#endif + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef __DEFINE_XEN_GUEST_HANDLE +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle +#define set_xen_guest_handle(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +__DEFINE_XEN_GUEST_HANDLE(foobarbaz, cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/arch-x86/xen-x86_64.h b/sys/xen/interface/arch-x86/xen-x86_64.h new file mode 100644 index 0000000..80135d2 --- /dev/null +++ b/sys/xen/interface/arch-x86/xen-x86_64.h @@ -0,0 +1,211 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; syscall + * Clobbered: %rcx, %r11, argument registers (as above) + */ +#define TRAP_INSTR "syscall" +#endif + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +#ifndef __ASSEMBLY__ + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#ifdef __GNUC__ +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/arch-x86/xen.h b/sys/xen/interface/arch-x86/xen.h new file mode 100644 index 0000000..158947f --- /dev/null +++ b/sys/xen/interface/arch-x86/xen.h @@ -0,0 +1,189 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +#if defined(__i386__) +#include <xen/interface/arch-x86/xen-x86_32.h> +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +#if 0 +DEFINE_XEN_GUEST_HANDLE(char); +DEFINE_XEN_GUEST_HANDLE(int); +DEFINE_XEN_GUEST_HANDLE(long); +DEFINE_XEN_GUEST_HANDLE(void); +#endif +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#define PRI_xen_pfn "lx" +#endif + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; + +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + } u; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + uint64_t pad[32]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/arch-x86_32.h b/sys/xen/interface/arch-x86_32.h new file mode 100644 index 0000000..1572fe3 --- /dev/null +++ b/sys/xen/interface/arch-x86_32.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include <xen/interface/arch-x86/xen.h> diff --git a/sys/xen/interface/arch-x86_64.h b/sys/xen/interface/arch-x86_64.h new file mode 100644 index 0000000..fbb2639 --- /dev/null +++ b/sys/xen/interface/arch-x86_64.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" diff --git a/sys/xen/interface/callback.h b/sys/xen/interface/callback.h new file mode 100644 index 0000000..cac5389 --- /dev/null +++ b/sys/xen/interface/callback.h @@ -0,0 +1,92 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +#define CALLBACKTYPE_event 0 +#define CALLBACKTYPE_failsafe 1 +#define CALLBACKTYPE_syscall 2 /* x86_64 only */ +/* + * sysenter is only available on x86_32 with the + * supervisor_mode_kernel option enabled. + */ +#define CALLBACKTYPE_sysenter 3 +#define CALLBACKTYPE_nmi 4 + +/* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +struct callback_register { + uint16_t type; + uint16_t flags; + xen_callback_t address; +}; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +struct callback_unregister { + uint16_t type; + uint16_t _unused; +}; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/dom0_ops.h b/sys/xen/interface/dom0_ops.h new file mode 100644 index 0000000..5d2b324 --- /dev/null +++ b/sys/xen/interface/dom0_ops.h @@ -0,0 +1,120 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOM0_OPS_H__ +#define __XEN_PUBLIC_DOM0_OPS_H__ + +#include "xen.h" +#include "platform.h" + +#if __XEN_INTERFACE_VERSION__ >= 0x00030204 +#error "dom0_ops.h is a compatibility interface only" +#endif + +#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION + +#define DOM0_SETTIME XENPF_settime +#define dom0_settime xenpf_settime +#define dom0_settime_t xenpf_settime_t + +#define DOM0_ADD_MEMTYPE XENPF_add_memtype +#define dom0_add_memtype xenpf_add_memtype +#define dom0_add_memtype_t xenpf_add_memtype_t + +#define DOM0_DEL_MEMTYPE XENPF_del_memtype +#define dom0_del_memtype xenpf_del_memtype +#define dom0_del_memtype_t xenpf_del_memtype_t + +#define DOM0_READ_MEMTYPE XENPF_read_memtype +#define dom0_read_memtype xenpf_read_memtype +#define dom0_read_memtype_t xenpf_read_memtype_t + +#define DOM0_MICROCODE XENPF_microcode_update +#define dom0_microcode xenpf_microcode_update +#define dom0_microcode_t xenpf_microcode_update_t + +#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk +#define dom0_platform_quirk xenpf_platform_quirk +#define dom0_platform_quirk_t xenpf_platform_quirk_t + +typedef uint64_t cpumap_t; + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_MSR 15 +struct dom0_msr { + /* IN variables. */ + uint32_t write; + cpumap_t cpu_mask; + uint32_t msr; + uint32_t in1; + uint32_t in2; + /* OUT variables. */ + uint32_t out1; + uint32_t out2; +}; +typedef struct dom0_msr dom0_msr_t; +DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_PHYSICAL_MEMORY_MAP 40 +struct dom0_memory_map_entry { + uint64_t start, end; + uint32_t flags; /* reserved */ + uint8_t is_ram; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; +DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); + +struct dom0_op { + uint32_t cmd; + uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ + union { + struct dom0_msr msr; + struct dom0_settime settime; + struct dom0_add_memtype add_memtype; + struct dom0_del_memtype del_memtype; + struct dom0_read_memtype read_memtype; + struct dom0_microcode microcode; + struct dom0_platform_quirk platform_quirk; + struct dom0_memory_map_entry physical_memory_map; + uint8_t pad[128]; + } u; +}; +typedef struct dom0_op dom0_op_t; +DEFINE_XEN_GUEST_HANDLE(dom0_op_t); + +#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/domctl.h b/sys/xen/interface/domctl.h new file mode 100644 index 0000000..fa1dbd9 --- /dev/null +++ b/sys/xen/interface/domctl.h @@ -0,0 +1,481 @@ +/****************************************************************************** + * domctl.h + * + * Domain management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOMCTL_H__ +#define __XEN_PUBLIC_DOMCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "domctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" + +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 + +struct xenctl_cpumap { + XEN_GUEST_HANDLE_64(uint8_t) bitmap; + uint32_t nr_cpus; +}; + +/* + * NB. xen_domctl.domain is an IN/OUT parameter for this operation. + * If it is specified as zero, an id is auto-allocated and returned. + */ +#define XEN_DOMCTL_createdomain 1 +struct xen_domctl_createdomain { + /* IN parameters */ + uint32_t ssidref; + xen_domain_handle_t handle; + /* Is this an HVM guest (as opposed to a PV guest)? */ +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) + uint32_t flags; +}; +typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); + +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_resumedomain 27 + +#define XEN_DOMCTL_getdomaininfo 5 +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) + /* CPU to which this domain is bound. */ +#define XEN_DOMINF_cpumask 255 +#define XEN_DOMINF_cpushift 8 + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); + + +#define XEN_DOMCTL_getmemlist 6 +struct xen_domctl_getmemlist { + /* IN variables. */ + /* Max entries to write to output buffer. */ + uint64_aligned_t max_pfns; + /* Start index in guest's page list. */ + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64_t) buffer; + /* OUT variables. */ + uint64_aligned_t num_pfns; +}; +typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); + + +#define XEN_DOMCTL_getpageframeinfo 7 + +#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) + +struct xen_domctl_getpageframeinfo { + /* IN variables. */ + uint64_aligned_t gmfn; /* GMFN to query */ + /* OUT variables. */ + /* Is the page PINNED to a type? */ + uint32_t type; /* see above type defs */ +}; +typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); + + +#define XEN_DOMCTL_getpageframeinfo2 8 +struct xen_domctl_getpageframeinfo2 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(uint32_t) array; +}; +typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); + + +/* + * Control shadow pagetables operation + */ +#define XEN_DOMCTL_shadow_op 10 + +/* Disable shadow mode. */ +#define XEN_DOMCTL_SHADOW_OP_OFF 0 + +/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 + +/* Log-dirty bitmap operations. */ + /* Return the bitmap and clean internal copy for next round. */ +#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 + /* Return the bitmap but do not modify internal copy. */ +#define XEN_DOMCTL_SHADOW_OP_PEEK 12 + +/* Memory allocation accessors. */ +#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 +#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 + +/* Legacy enable operations. */ + /* Equiv. to ENABLE with no mode flags. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 + /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 + /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + +/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ + /* + * Shadow pagetables are refcounted: guest does not use explicit mmu + * operations nor write-protect its pagetables. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) + /* + * Log pages in a bitmap as they are dirtied. + * Used for live relocation to determine which pages must be re-sent. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) + /* + * Automatically translate GPFNs into MFNs. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) + /* + * Xen does not steal virtual address space from the guest. + * Requires HVM support. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) + +struct xen_domctl_shadow_op_stats { + uint32_t fault_count; + uint32_t dirty_count; +}; +typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); + +struct xen_domctl_shadow_op { + /* IN variables. */ + uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ + + /* OP_ENABLE */ + uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + + /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ + uint32_t mb; /* Shadow memory allocation in MB */ + + /* OP_PEEK / OP_CLEAN */ + XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ + struct xen_domctl_shadow_op_stats stats; +}; +typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); + + +#define XEN_DOMCTL_max_mem 11 +struct xen_domctl_max_mem { + /* IN variables. */ + uint64_aligned_t max_memkb; +}; +typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); + + +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +struct xen_domctl_vcpucontext { + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); + + +#define XEN_DOMCTL_getvcpuinfo 14 +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; +typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); + + +/* Get/set which physical cpus a vcpu can execute on. */ +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_getvcpuaffinity 25 +struct xen_domctl_vcpuaffinity { + uint32_t vcpu; /* IN */ + struct xenctl_cpumap cpumap; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); + + +#define XEN_DOMCTL_max_vcpus 15 +struct xen_domctl_max_vcpus { + uint32_t max; /* maximum number of vcpus */ +}; +typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); + + +#define XEN_DOMCTL_scheduler_op 16 +/* Scheduler types. */ +#define XEN_SCHEDULER_SEDF 4 +#define XEN_SCHEDULER_CREDIT 5 +/* Set or get info? */ +#define XEN_DOMCTL_SCHEDOP_putinfo 0 +#define XEN_DOMCTL_SCHEDOP_getinfo 1 +struct xen_domctl_scheduler_op { + uint32_t sched_id; /* XEN_SCHEDULER_* */ + uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + union { + struct xen_domctl_sched_sedf { + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; + uint32_t extratime; + uint32_t weight; + } sedf; + struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; + } credit; + } u; +}; +typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); + + +#define XEN_DOMCTL_setdomainhandle 17 +struct xen_domctl_setdomainhandle { + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); + + +#define XEN_DOMCTL_setdebugging 18 +struct xen_domctl_setdebugging { + uint8_t enable; +}; +typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); + + +#define XEN_DOMCTL_irq_permission 19 +struct xen_domctl_irq_permission { + uint8_t pirq; + uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ +}; +typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); + + +#define XEN_DOMCTL_iomem_permission 20 +struct xen_domctl_iomem_permission { + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); + + +#define XEN_DOMCTL_ioport_permission 21 +struct xen_domctl_ioport_permission { + uint32_t first_port; /* first port int range */ + uint32_t nr_ports; /* size of port range */ + uint8_t allow_access; /* allow or deny access to range? */ +}; +typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + + +#define XEN_DOMCTL_hypercall_init 22 +struct xen_domctl_hypercall_init { + uint64_aligned_t gmfn; /* GMFN to be initialised */ +}; +typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + + +#define XEN_DOMCTL_arch_setup 23 +#define _XEN_DOMAINSETUP_hvm_guest 0 +#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) +#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ +#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) +typedef struct xen_domctl_arch_setup { + uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ +#ifdef __ia64__ + uint64_aligned_t bp; /* mpaddr of boot param area */ + uint64_aligned_t maxmem; /* Highest memory address for MDT. */ + uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ + uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ +#endif +} xen_domctl_arch_setup_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); + + +#define XEN_DOMCTL_settimeoffset 24 +struct xen_domctl_settimeoffset { + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + + +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size + * req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +#define XEN_DOMCTL_real_mode_area 26 +struct xen_domctl_real_mode_area { + uint32_t log; /* log2 of Real Mode Area size */ +}; +typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + + +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + +struct xen_domctl { + uint32_t cmd; + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_createdomain createdomain; + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_getmemlist getmemlist; + struct xen_domctl_getpageframeinfo getpageframeinfo; + struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_vcpuaffinity vcpuaffinity; + struct xen_domctl_shadow_op shadow_op; + struct xen_domctl_max_mem max_mem; + struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; + struct xen_domctl_max_vcpus max_vcpus; + struct xen_domctl_scheduler_op scheduler_op; + struct xen_domctl_setdomainhandle setdomainhandle; + struct xen_domctl_setdebugging setdebugging; + struct xen_domctl_irq_permission irq_permission; + struct xen_domctl_iomem_permission iomem_permission; + struct xen_domctl_ioport_permission ioport_permission; + struct xen_domctl_hypercall_init hypercall_init; + struct xen_domctl_arch_setup arch_setup; + struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; + uint8_t pad[128]; + } u; +}; +typedef struct xen_domctl xen_domctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); + +#endif /* __XEN_PUBLIC_DOMCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/elfnote.h b/sys/xen/interface/elfnote.h new file mode 100644 index 0000000..77be41b --- /dev/null +++ b/sys/xen/interface/elfnote.h @@ -0,0 +1,233 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * The notes should live in a PT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + */ + +/* + * NAME=VALUE pair (string). + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the acutal required + * psuedo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/elfstructs.h b/sys/xen/interface/elfstructs.h new file mode 100644 index 0000000..77362f3 --- /dev/null +++ b/sys/xen/interface/elfstructs.h @@ -0,0 +1,527 @@ +#ifndef __XEN_PUBLIC_ELFSTRUCTS_H__ +#define __XEN_PUBLIC_ELFSTRUCTS_H__ 1 +/* + * Copyright (c) 1995, 1996 Erik Theisen. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +typedef uint8_t Elf_Byte; + +typedef uint32_t Elf32_Addr; /* Unsigned program address */ +typedef uint32_t Elf32_Off; /* Unsigned file offset */ +typedef int32_t Elf32_Sword; /* Signed large integer */ +typedef uint32_t Elf32_Word; /* Unsigned large integer */ +typedef uint16_t Elf32_Half; /* Unsigned medium integer */ + +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; +typedef int32_t Elf64_Shalf; + +typedef int32_t Elf64_Sword; +typedef uint32_t Elf64_Word; + +typedef int64_t Elf64_Sxword; +typedef uint64_t Elf64_Xword; + +typedef uint32_t Elf64_Half; +typedef uint16_t Elf64_Quarter; + +/* + * e_ident[] identification indexes + * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html + */ +#define EI_MAG0 0 /* file ID */ +#define EI_MAG1 1 /* file ID */ +#define EI_MAG2 2 /* file ID */ +#define EI_MAG3 3 /* file ID */ +#define EI_CLASS 4 /* file class */ +#define EI_DATA 5 /* data encoding */ +#define EI_VERSION 6 /* ELF header version */ +#define EI_OSABI 7 /* OS/ABI ID */ +#define EI_ABIVERSION 8 /* ABI version */ +#define EI_PAD 9 /* start of pad bytes */ +#define EI_NIDENT 16 /* Size of e_ident[] */ + +/* e_ident[] magic number */ +#define ELFMAG0 0x7f /* e_ident[EI_MAG0] */ +#define ELFMAG1 'E' /* e_ident[EI_MAG1] */ +#define ELFMAG2 'L' /* e_ident[EI_MAG2] */ +#define ELFMAG3 'F' /* e_ident[EI_MAG3] */ +#define ELFMAG "\177ELF" /* magic */ +#define SELFMAG 4 /* size of magic */ + +/* e_ident[] file class */ +#define ELFCLASSNONE 0 /* invalid */ +#define ELFCLASS32 1 /* 32-bit objs */ +#define ELFCLASS64 2 /* 64-bit objs */ +#define ELFCLASSNUM 3 /* number of classes */ + +/* e_ident[] data encoding */ +#define ELFDATANONE 0 /* invalid */ +#define ELFDATA2LSB 1 /* Little-Endian */ +#define ELFDATA2MSB 2 /* Big-Endian */ +#define ELFDATANUM 3 /* number of data encode defines */ + +/* e_ident[] Operating System/ABI */ +#define ELFOSABI_SYSV 0 /* UNIX System V ABI */ +#define ELFOSABI_HPUX 1 /* HP-UX operating system */ +#define ELFOSABI_NETBSD 2 /* NetBSD */ +#define ELFOSABI_LINUX 3 /* GNU/Linux */ +#define ELFOSABI_HURD 4 /* GNU/Hurd */ +#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */ +#define ELFOSABI_SOLARIS 6 /* Solaris */ +#define ELFOSABI_MONTEREY 7 /* Monterey */ +#define ELFOSABI_IRIX 8 /* IRIX */ +#define ELFOSABI_FREEBSD 9 /* FreeBSD */ +#define ELFOSABI_TRU64 10 /* TRU64 UNIX */ +#define ELFOSABI_MODESTO 11 /* Novell Modesto */ +#define ELFOSABI_OPENBSD 12 /* OpenBSD */ +#define ELFOSABI_ARM 97 /* ARM */ +#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ + +/* e_ident */ +#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \ + (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \ + (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \ + (ehdr).e_ident[EI_MAG3] == ELFMAG3) + +/* ELF Header */ +typedef struct elfhdr { + unsigned char e_ident[EI_NIDENT]; /* ELF Identification */ + Elf32_Half e_type; /* object file type */ + Elf32_Half e_machine; /* machine */ + Elf32_Word e_version; /* object file version */ + Elf32_Addr e_entry; /* virtual entry point */ + Elf32_Off e_phoff; /* program header table offset */ + Elf32_Off e_shoff; /* section header table offset */ + Elf32_Word e_flags; /* processor-specific flags */ + Elf32_Half e_ehsize; /* ELF header size */ + Elf32_Half e_phentsize; /* program header entry size */ + Elf32_Half e_phnum; /* number of program header entries */ + Elf32_Half e_shentsize; /* section header entry size */ + Elf32_Half e_shnum; /* number of section header entries */ + Elf32_Half e_shstrndx; /* section header table's "section + header string table" entry offset */ +} Elf32_Ehdr; + +typedef struct { + unsigned char e_ident[EI_NIDENT]; /* Id bytes */ + Elf64_Quarter e_type; /* file type */ + Elf64_Quarter e_machine; /* machine type */ + Elf64_Half e_version; /* version number */ + Elf64_Addr e_entry; /* entry point */ + Elf64_Off e_phoff; /* Program hdr offset */ + Elf64_Off e_shoff; /* Section hdr offset */ + Elf64_Half e_flags; /* Processor flags */ + Elf64_Quarter e_ehsize; /* sizeof ehdr */ + Elf64_Quarter e_phentsize; /* Program header entry size */ + Elf64_Quarter e_phnum; /* Number of program headers */ + Elf64_Quarter e_shentsize; /* Section header entry size */ + Elf64_Quarter e_shnum; /* Number of section headers */ + Elf64_Quarter e_shstrndx; /* String table index */ +} Elf64_Ehdr; + +/* e_type */ +#define ET_NONE 0 /* No file type */ +#define ET_REL 1 /* relocatable file */ +#define ET_EXEC 2 /* executable file */ +#define ET_DYN 3 /* shared object file */ +#define ET_CORE 4 /* core file */ +#define ET_NUM 5 /* number of types */ +#define ET_LOPROC 0xff00 /* reserved range for processor */ +#define ET_HIPROC 0xffff /* specific e_type */ + +/* e_machine */ +#define EM_NONE 0 /* No Machine */ +#define EM_M32 1 /* AT&T WE 32100 */ +#define EM_SPARC 2 /* SPARC */ +#define EM_386 3 /* Intel 80386 */ +#define EM_68K 4 /* Motorola 68000 */ +#define EM_88K 5 /* Motorola 88000 */ +#define EM_486 6 /* Intel 80486 - unused? */ +#define EM_860 7 /* Intel 80860 */ +#define EM_MIPS 8 /* MIPS R3000 Big-Endian only */ +/* + * Don't know if EM_MIPS_RS4_BE, + * EM_SPARC64, EM_PARISC, + * or EM_PPC are ABI compliant + */ +#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */ +#define EM_SPARC64 11 /* SPARC v9 64-bit unoffical */ +#define EM_PARISC 15 /* HPPA */ +#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */ +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC 64-bit */ +#define EM_ARM 40 /* Advanced RISC Machines ARM */ +#define EM_ALPHA 41 /* DEC ALPHA */ +#define EM_SPARCV9 43 /* SPARC version 9 */ +#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */ +#define EM_IA_64 50 /* Intel Merced */ +#define EM_X86_64 62 /* AMD x86-64 architecture */ +#define EM_VAX 75 /* DEC VAX */ + +/* Version */ +#define EV_NONE 0 /* Invalid */ +#define EV_CURRENT 1 /* Current */ +#define EV_NUM 2 /* number of versions */ + +/* Section Header */ +typedef struct { + Elf32_Word sh_name; /* name - index into section header + string table section */ + Elf32_Word sh_type; /* type */ + Elf32_Word sh_flags; /* flags */ + Elf32_Addr sh_addr; /* address */ + Elf32_Off sh_offset; /* file offset */ + Elf32_Word sh_size; /* section size */ + Elf32_Word sh_link; /* section header table index link */ + Elf32_Word sh_info; /* extra information */ + Elf32_Word sh_addralign; /* address alignment */ + Elf32_Word sh_entsize; /* section entry size */ +} Elf32_Shdr; + +typedef struct { + Elf64_Half sh_name; /* section name */ + Elf64_Half sh_type; /* section type */ + Elf64_Xword sh_flags; /* section flags */ + Elf64_Addr sh_addr; /* virtual address */ + Elf64_Off sh_offset; /* file offset */ + Elf64_Xword sh_size; /* section size */ + Elf64_Half sh_link; /* link to another */ + Elf64_Half sh_info; /* misc info */ + Elf64_Xword sh_addralign; /* memory alignment */ + Elf64_Xword sh_entsize; /* table entry size */ +} Elf64_Shdr; + +/* Special Section Indexes */ +#define SHN_UNDEF 0 /* undefined */ +#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */ +#define SHN_LOPROC 0xff00 /* reserved range for processor */ +#define SHN_HIPROC 0xff1f /* specific section indexes */ +#define SHN_ABS 0xfff1 /* absolute value */ +#define SHN_COMMON 0xfff2 /* common symbol */ +#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */ + +/* sh_type */ +#define SHT_NULL 0 /* inactive */ +#define SHT_PROGBITS 1 /* program defined information */ +#define SHT_SYMTAB 2 /* symbol table section */ +#define SHT_STRTAB 3 /* string table section */ +#define SHT_RELA 4 /* relocation section with addends*/ +#define SHT_HASH 5 /* symbol hash table section */ +#define SHT_DYNAMIC 6 /* dynamic section */ +#define SHT_NOTE 7 /* note section */ +#define SHT_NOBITS 8 /* no space section */ +#define SHT_REL 9 /* relation section without addends */ +#define SHT_SHLIB 10 /* reserved - purpose unknown */ +#define SHT_DYNSYM 11 /* dynamic symbol table section */ +#define SHT_NUM 12 /* number of section types */ +#define SHT_LOPROC 0x70000000 /* reserved range for processor */ +#define SHT_HIPROC 0x7fffffff /* specific section header types */ +#define SHT_LOUSER 0x80000000 /* reserved range for application */ +#define SHT_HIUSER 0xffffffff /* specific indexes */ + +/* Section names */ +#define ELF_BSS ".bss" /* uninitialized data */ +#define ELF_DATA ".data" /* initialized data */ +#define ELF_DEBUG ".debug" /* debug */ +#define ELF_DYNAMIC ".dynamic" /* dynamic linking information */ +#define ELF_DYNSTR ".dynstr" /* dynamic string table */ +#define ELF_DYNSYM ".dynsym" /* dynamic symbol table */ +#define ELF_FINI ".fini" /* termination code */ +#define ELF_GOT ".got" /* global offset table */ +#define ELF_HASH ".hash" /* symbol hash table */ +#define ELF_INIT ".init" /* initialization code */ +#define ELF_REL_DATA ".rel.data" /* relocation data */ +#define ELF_REL_FINI ".rel.fini" /* relocation termination code */ +#define ELF_REL_INIT ".rel.init" /* relocation initialization code */ +#define ELF_REL_DYN ".rel.dyn" /* relocaltion dynamic link info */ +#define ELF_REL_RODATA ".rel.rodata" /* relocation read-only data */ +#define ELF_REL_TEXT ".rel.text" /* relocation code */ +#define ELF_RODATA ".rodata" /* read-only data */ +#define ELF_SHSTRTAB ".shstrtab" /* section header string table */ +#define ELF_STRTAB ".strtab" /* string table */ +#define ELF_SYMTAB ".symtab" /* symbol table */ +#define ELF_TEXT ".text" /* code */ + + +/* Section Attribute Flags - sh_flags */ +#define SHF_WRITE 0x1 /* Writable */ +#define SHF_ALLOC 0x2 /* occupies memory */ +#define SHF_EXECINSTR 0x4 /* executable */ +#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */ + /* specific section attributes */ + +/* Symbol Table Entry */ +typedef struct elf32_sym { + Elf32_Word st_name; /* name - index into string table */ + Elf32_Addr st_value; /* symbol value */ + Elf32_Word st_size; /* symbol size */ + unsigned char st_info; /* type and binding */ + unsigned char st_other; /* 0 - no defined meaning */ + Elf32_Half st_shndx; /* section header index */ +} Elf32_Sym; + +typedef struct { + Elf64_Half st_name; /* Symbol name index in str table */ + Elf_Byte st_info; /* type / binding attrs */ + Elf_Byte st_other; /* unused */ + Elf64_Quarter st_shndx; /* section index of symbol */ + Elf64_Xword st_value; /* value of symbol */ + Elf64_Xword st_size; /* size of symbol */ +} Elf64_Sym; + +/* Symbol table index */ +#define STN_UNDEF 0 /* undefined */ + +/* Extract symbol info - st_info */ +#define ELF32_ST_BIND(x) ((x) >> 4) +#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +#define ELF64_ST_BIND(x) ((x) >> 4) +#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +/* Symbol Binding - ELF32_ST_BIND - st_info */ +#define STB_LOCAL 0 /* Local symbol */ +#define STB_GLOBAL 1 /* Global symbol */ +#define STB_WEAK 2 /* like global - lower precedence */ +#define STB_NUM 3 /* number of symbol bindings */ +#define STB_LOPROC 13 /* reserved range for processor */ +#define STB_HIPROC 15 /* specific symbol bindings */ + +/* Symbol type - ELF32_ST_TYPE - st_info */ +#define STT_NOTYPE 0 /* not specified */ +#define STT_OBJECT 1 /* data object */ +#define STT_FUNC 2 /* function */ +#define STT_SECTION 3 /* section */ +#define STT_FILE 4 /* file */ +#define STT_NUM 5 /* number of symbol types */ +#define STT_LOPROC 13 /* reserved range for processor */ +#define STT_HIPROC 15 /* specific symbol types */ + +/* Relocation entry with implicit addend */ +typedef struct { + Elf32_Addr r_offset; /* offset of relocation */ + Elf32_Word r_info; /* symbol table index and type */ +} Elf32_Rel; + +/* Relocation entry with explicit addend */ +typedef struct { + Elf32_Addr r_offset; /* offset of relocation */ + Elf32_Word r_info; /* symbol table index and type */ + Elf32_Sword r_addend; +} Elf32_Rela; + +/* Extract relocation info - r_info */ +#define ELF32_R_SYM(i) ((i) >> 8) +#define ELF32_R_TYPE(i) ((unsigned char) (i)) +#define ELF32_R_INFO(s,t) (((s) << 8) + (unsigned char)(t)) + +typedef struct { + Elf64_Xword r_offset; /* where to do it */ + Elf64_Xword r_info; /* index & type of relocation */ +} Elf64_Rel; + +typedef struct { + Elf64_Xword r_offset; /* where to do it */ + Elf64_Xword r_info; /* index & type of relocation */ + Elf64_Sxword r_addend; /* adjustment value */ +} Elf64_Rela; + +#define ELF64_R_SYM(info) ((info) >> 32) +#define ELF64_R_TYPE(info) ((info) & 0xFFFFFFFF) +#define ELF64_R_INFO(s,t) (((s) << 32) + (u_int32_t)(t)) + +/* Program Header */ +typedef struct { + Elf32_Word p_type; /* segment type */ + Elf32_Off p_offset; /* segment offset */ + Elf32_Addr p_vaddr; /* virtual address of segment */ + Elf32_Addr p_paddr; /* physical address - ignored? */ + Elf32_Word p_filesz; /* number of bytes in file for seg. */ + Elf32_Word p_memsz; /* number of bytes in mem. for seg. */ + Elf32_Word p_flags; /* flags */ + Elf32_Word p_align; /* memory alignment */ +} Elf32_Phdr; + +typedef struct { + Elf64_Half p_type; /* entry type */ + Elf64_Half p_flags; /* flags */ + Elf64_Off p_offset; /* offset */ + Elf64_Addr p_vaddr; /* virtual address */ + Elf64_Addr p_paddr; /* physical address */ + Elf64_Xword p_filesz; /* file size */ + Elf64_Xword p_memsz; /* memory size */ + Elf64_Xword p_align; /* memory & file alignment */ +} Elf64_Phdr; + +/* Segment types - p_type */ +#define PT_NULL 0 /* unused */ +#define PT_LOAD 1 /* loadable segment */ +#define PT_DYNAMIC 2 /* dynamic linking section */ +#define PT_INTERP 3 /* the RTLD */ +#define PT_NOTE 4 /* auxiliary information */ +#define PT_SHLIB 5 /* reserved - purpose undefined */ +#define PT_PHDR 6 /* program header */ +#define PT_NUM 7 /* Number of segment types */ +#define PT_LOPROC 0x70000000 /* reserved range for processor */ +#define PT_HIPROC 0x7fffffff /* specific segment types */ + +/* Segment flags - p_flags */ +#define PF_X 0x1 /* Executable */ +#define PF_W 0x2 /* Writable */ +#define PF_R 0x4 /* Readable */ +#define PF_MASKPROC 0xf0000000 /* reserved bits for processor */ + /* specific segment flags */ + +/* Dynamic structure */ +typedef struct { + Elf32_Sword d_tag; /* controls meaning of d_val */ + union { + Elf32_Word d_val; /* Multiple meanings - see d_tag */ + Elf32_Addr d_ptr; /* program virtual address */ + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Xword d_tag; /* controls meaning of d_val */ + union { + Elf64_Addr d_ptr; + Elf64_Xword d_val; + } d_un; +} Elf64_Dyn; + +/* Dynamic Array Tags - d_tag */ +#define DT_NULL 0 /* marks end of _DYNAMIC array */ +#define DT_NEEDED 1 /* string table offset of needed lib */ +#define DT_PLTRELSZ 2 /* size of relocation entries in PLT */ +#define DT_PLTGOT 3 /* address PLT/GOT */ +#define DT_HASH 4 /* address of symbol hash table */ +#define DT_STRTAB 5 /* address of string table */ +#define DT_SYMTAB 6 /* address of symbol table */ +#define DT_RELA 7 /* address of relocation table */ +#define DT_RELASZ 8 /* size of relocation table */ +#define DT_RELAENT 9 /* size of relocation entry */ +#define DT_STRSZ 10 /* size of string table */ +#define DT_SYMENT 11 /* size of symbol table entry */ +#define DT_INIT 12 /* address of initialization func. */ +#define DT_FINI 13 /* address of termination function */ +#define DT_SONAME 14 /* string table offset of shared obj */ +#define DT_RPATH 15 /* string table offset of library + search path */ +#define DT_SYMBOLIC 16 /* start sym search in shared obj. */ +#define DT_REL 17 /* address of rel. tbl. w addends */ +#define DT_RELSZ 18 /* size of DT_REL relocation table */ +#define DT_RELENT 19 /* size of DT_REL relocation entry */ +#define DT_PLTREL 20 /* PLT referenced relocation entry */ +#define DT_DEBUG 21 /* bugger */ +#define DT_TEXTREL 22 /* Allow rel. mod. to unwritable seg */ +#define DT_JMPREL 23 /* add. of PLT's relocation entries */ +#define DT_BIND_NOW 24 /* Bind now regardless of env setting */ +#define DT_NUM 25 /* Number used. */ +#define DT_LOPROC 0x70000000 /* reserved range for processor */ +#define DT_HIPROC 0x7fffffff /* specific dynamic array tags */ + +/* Standard ELF hashing function */ +unsigned int elf_hash(const unsigned char *name); + +/* + * Note Definitions + */ +typedef struct { + Elf32_Word namesz; + Elf32_Word descsz; + Elf32_Word type; +} Elf32_Note; + +typedef struct { + Elf64_Half namesz; + Elf64_Half descsz; + Elf64_Half type; +} Elf64_Note; + + +#if defined(ELFSIZE) +#define CONCAT(x,y) __CONCAT(x,y) +#define ELFNAME(x) CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x))) +#define ELFNAME2(x,y) CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y)))) +#define ELFNAMEEND(x) CONCAT(x,CONCAT(_elf,ELFSIZE)) +#define ELFDEFNNAME(x) CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x))) +#endif + +#if defined(ELFSIZE) && (ELFSIZE == 32) +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Rel Elf32_Rel +#define Elf_RelA Elf32_Rela +#define Elf_Dyn Elf32_Dyn +#define Elf_Word Elf32_Word +#define Elf_Sword Elf32_Sword +#define Elf_Addr Elf32_Addr +#define Elf_Off Elf32_Off +#define Elf_Nhdr Elf32_Nhdr +#define Elf_Note Elf32_Note + +#define ELF_R_SYM ELF32_R_SYM +#define ELF_R_TYPE ELF32_R_TYPE +#define ELF_R_INFO ELF32_R_INFO +#define ELFCLASS ELFCLASS32 + +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE +#define ELF_ST_INFO ELF32_ST_INFO + +#define AuxInfo Aux32Info +#elif defined(ELFSIZE) && (ELFSIZE == 64) +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Rel Elf64_Rel +#define Elf_RelA Elf64_Rela +#define Elf_Dyn Elf64_Dyn +#define Elf_Word Elf64_Word +#define Elf_Sword Elf64_Sword +#define Elf_Addr Elf64_Addr +#define Elf_Off Elf64_Off +#define Elf_Nhdr Elf64_Nhdr +#define Elf_Note Elf64_Note + +#define ELF_R_SYM ELF64_R_SYM +#define ELF_R_TYPE ELF64_R_TYPE +#define ELF_R_INFO ELF64_R_INFO +#define ELFCLASS ELFCLASS64 + +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE +#define ELF_ST_INFO ELF64_ST_INFO + +#define AuxInfo Aux64Info +#endif + +#endif /* __XEN_PUBLIC_ELFSTRUCTS_H__ */ diff --git a/sys/xen/interface/event_channel.h b/sys/xen/interface/event_channel.h new file mode 100644 index 0000000..d35cce5 --- /dev/null +++ b/sys/xen/interface/event_channel.h @@ -0,0 +1,264 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +/* + * Prototype for this hypercall is: + * int event_channel_op(int cmd, void *args) + * @cmd == EVTCHNOP_??? (event-channel operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as + * accepting interdomain bindings from domain <remote_dom>. A fresh port + * is allocated in <dom> and returned as <port>. + * NOTES: + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * <local_port>. + * NOTES: + * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel <port>. If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is <port>. + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at <dom, port>. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which <dom> is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * Argument to event_channel_op_compat() hypercall. Superceded by new + * event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/features.h b/sys/xen/interface/features.h new file mode 100644 index 0000000..d4b373f --- /dev/null +++ b/sys/xen/interface/features.h @@ -0,0 +1,71 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/foreign/Makefile b/sys/xen/interface/foreign/Makefile new file mode 100644 index 0000000..7bde873 --- /dev/null +++ b/sys/xen/interface/foreign/Makefile @@ -0,0 +1,37 @@ +XEN_ROOT=../../../.. +include $(XEN_ROOT)/Config.mk + +architectures := x86_32 x86_64 ia64 +headers := $(patsubst %, %.h, $(architectures)) +scripts := $(wildcard *.py) + +.PHONY: all clean check-headers +all: $(headers) check-headers + +clean: + rm -f $(headers) + rm -f checker checker.c $(XEN_TARGET_ARCH).size + rm -f *.pyc *.o *~ + +ifeq ($(CROSS_COMPILE)$(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH)) +check-headers: checker + ./checker > $(XEN_TARGET_ARCH).size + diff -u reference.size $(XEN_TARGET_ARCH).size +checker: checker.c $(headers) + $(HOSTCC) $(HOSTCFLAGS) -o $@ $< +else +check-headers: + @echo "cross build: skipping check" +endif + +x86_32.h: ../arch-x86/xen-x86_32.h ../arch-x86/xen.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +x86_64.h: ../arch-x86/xen-x86_64.h ../arch-x86/xen.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +ia64.h: ../arch-ia64.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +checker.c: $(scripts) + python mkchecker.py $(XEN_TARGET_ARCH) $@ $(architectures) diff --git a/sys/xen/interface/foreign/mkchecker.py b/sys/xen/interface/foreign/mkchecker.py new file mode 100644 index 0000000..b30e82a --- /dev/null +++ b/sys/xen/interface/foreign/mkchecker.py @@ -0,0 +1,58 @@ +#!/usr/bin/python + +import sys; +from structs import structs; + +# command line arguments +arch = sys.argv[1]; +outfile = sys.argv[2]; +archs = sys.argv[3:]; + +f = open(outfile, "w"); +f.write(''' +/* + * sanity checks for generated foreign headers: + * - verify struct sizes + * + * generated by %s -- DO NOT EDIT + */ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <inttypes.h> +#include "../xen.h" +'''); + +for a in archs: + f.write('#include "%s.h"\n' % a); + +f.write('int main(int argc, char *argv[])\n{\n'); + +f.write('\tprintf("\\n");'); +f.write('printf("%-20s |", "structs");\n'); +for a in archs: + f.write('\tprintf("%%8s", "%s");\n' % a); +f.write('\tprintf("\\n");'); + +f.write('\tprintf("\\n");'); +for struct in structs: + f.write('\tprintf("%%-20s |", "%s");\n' % struct); + for a in archs: + if a == arch: + s = struct; # native + else: + s = struct + "_" + a; + f.write('#ifdef %s_has_no_%s\n' % (a, struct)); + f.write('\tprintf("%8s", "-");\n'); + f.write("#else\n"); + f.write('\tprintf("%%8zd", sizeof(struct %s));\n' % s); + f.write("#endif\n"); + + f.write('\tprintf("\\n");\n\n'); + +f.write('\tprintf("\\n");\n'); +f.write('\texit(0);\n'); +f.write('}\n'); + +f.close(); + diff --git a/sys/xen/interface/foreign/mkheader.py b/sys/xen/interface/foreign/mkheader.py new file mode 100644 index 0000000..a5310eb --- /dev/null +++ b/sys/xen/interface/foreign/mkheader.py @@ -0,0 +1,153 @@ +#!/usr/bin/python + +import sys, re; +from structs import structs, defines; + +# command line arguments +arch = sys.argv[1]; +outfile = sys.argv[2]; +infiles = sys.argv[3:]; + + +########################################################################### +# configuration #2: architecture information + +inttypes = {}; +header = {}; +footer = {}; + +# x86_32 +inttypes["x86_32"] = { + "unsigned long" : "uint32_t", + "long" : "uint32_t", + "xen_pfn_t" : "uint32_t", +}; +header["x86_32"] = """ +#define __i386___X86_32 1 +#pragma pack(4) +"""; +footer["x86_32"] = """ +#pragma pack() +"""; + +# x86_64 +inttypes["x86_64"] = { + "unsigned long" : "__align8__ uint64_t", + "long" : "__align8__ uint64_t", + "xen_pfn_t" : "__align8__ uint64_t", +}; +header["x86_64"] = """ +#ifdef __GNUC__ +# define __DECL_REG(name) union { uint64_t r ## name, e ## name; } +# define __align8__ __attribute__((aligned (8))) +#else +# define __DECL_REG(name) uint64_t r ## name +# define __align8__ FIXME +#endif +#define __x86_64___X86_64 1 +"""; + +# ia64 +inttypes["ia64"] = { + "unsigned long" : "__align8__ uint64_t", + "long" : "__align8__ uint64_t", + "xen_pfn_t" : "__align8__ uint64_t", + "long double" : "__align16__ ldouble_t", +}; +header["ia64"] = """ +#define __align8__ __attribute__((aligned (8))) +#define __align16__ __attribute__((aligned (16))) +typedef unsigned char ldouble_t[16]; +"""; + + +########################################################################### +# main + +input = ""; +output = ""; +fileid = re.sub("[-.]", "_", "__FOREIGN_%s__" % outfile.upper()); + +# read input header files +for name in infiles: + f = open(name, "r"); + input += f.read(); + f.close(); + +# add header +output += """ +/* + * public xen defines and struct for %s + * generated by %s -- DO NOT EDIT + */ + +#ifndef %s +#define %s 1 + +""" % (arch, sys.argv[0], fileid, fileid) + +if arch in header: + output += header[arch]; + output += "\n"; + +# add defines to output +for line in re.findall("#define[^\n]+", input): + for define in defines: + regex = "#define\s+%s\\b" % define; + match = re.search(regex, line); + if None == match: + continue; + if define.upper()[0] == define[0]: + replace = define + "_" + arch.upper(); + else: + replace = define + "_" + arch; + regex = "\\b%s\\b" % define; + output += re.sub(regex, replace, line) + "\n"; +output += "\n"; + +# delete defines, comments, empty lines +input = re.sub("#define[^\n]+\n", "", input); +input = re.compile("/\*(.*?)\*/", re.S).sub("", input) +input = re.compile("\n\s*\n", re.S).sub("\n", input); + +# add structs to output +for struct in structs: + regex = "struct\s+%s\s*\{(.*?)\n\};" % struct; + match = re.search(regex, input, re.S) + if None == match: + output += "#define %s_has_no_%s 1\n" % (arch, struct); + else: + output += "struct %s_%s {%s\n};\n" % (struct, arch, match.group(1)); + output += "typedef struct %s_%s %s_%s_t;\n" % (struct, arch, struct, arch); + output += "\n"; + +# add footer +if arch in footer: + output += footer[arch]; + output += "\n"; +output += "#endif /* %s */\n" % fileid; + +# replace: defines +for define in defines: + if define.upper()[0] == define[0]: + replace = define + "_" + arch.upper(); + else: + replace = define + "_" + arch; + output = re.sub("\\b%s\\b" % define, replace, output); + +# replace: structs + struct typedefs +for struct in structs: + output = re.sub("\\b(struct\s+%s)\\b" % struct, "\\1_%s" % arch, output); + output = re.sub("\\b(%s)_t\\b" % struct, "\\1_%s_t" % arch, output); + +# replace: integer types +integers = inttypes[arch].keys(); +integers.sort(lambda a, b: cmp(len(b),len(a))); +for type in integers: + output = re.sub("\\b%s\\b" % type, inttypes[arch][type], output); + +# print results +f = open(outfile, "w"); +f.write(output); +f.close; + diff --git a/sys/xen/interface/foreign/reference.size b/sys/xen/interface/foreign/reference.size new file mode 100644 index 0000000..f35d281 --- /dev/null +++ b/sys/xen/interface/foreign/reference.size @@ -0,0 +1,17 @@ + +structs | x86_32 x86_64 ia64 + +start_info | 1104 1152 1152 +trap_info | 8 16 - +pt_fpreg | - - 16 +cpu_user_regs | 68 200 496 +xen_ia64_boot_param | - - 96 +ia64_tr_entry | - - 32 +vcpu_extra_regs | - - 536 +vcpu_guest_context | 2800 5168 1056 +arch_vcpu_info | 24 16 0 +vcpu_time_info | 32 32 32 +vcpu_info | 64 64 48 +arch_shared_info | 268 280 272 +shared_info | 2584 3368 4384 + diff --git a/sys/xen/interface/foreign/structs.py b/sys/xen/interface/foreign/structs.py new file mode 100644 index 0000000..6ecd1aa --- /dev/null +++ b/sys/xen/interface/foreign/structs.py @@ -0,0 +1,54 @@ +# configuration: what needs translation + +structs = [ "start_info", + "trap_info", + "pt_fpreg", + "cpu_user_regs", + "xen_ia64_boot_param", + "ia64_tr_entry", + "vcpu_extra_regs", + "vcpu_guest_context", + "arch_vcpu_info", + "vcpu_time_info", + "vcpu_info", + "arch_shared_info", + "shared_info" ]; + +defines = [ "__i386__", + "__x86_64__", + + "FLAT_RING1_CS", + "FLAT_RING1_DS", + "FLAT_RING1_SS", + + "FLAT_RING3_CS64", + "FLAT_RING3_DS64", + "FLAT_RING3_SS64", + "FLAT_KERNEL_CS64", + "FLAT_KERNEL_DS64", + "FLAT_KERNEL_SS64", + + "FLAT_KERNEL_CS", + "FLAT_KERNEL_DS", + "FLAT_KERNEL_SS", + + # x86_{32,64} + "_VGCF_i387_valid", + "VGCF_i387_valid", + "_VGCF_in_kernel", + "VGCF_in_kernel", + "_VGCF_failsafe_disables_events", + "VGCF_failsafe_disables_events", + "_VGCF_syscall_disables_events", + "VGCF_syscall_disables_events", + "_VGCF_online", + "VGCF_online", + + # ia64 + "VGCF_EXTRA_REGS", + + # all archs + "xen_pfn_to_cr3", + "MAX_VIRT_CPUS", + "MAX_GUEST_CMDLINE" ]; + diff --git a/sys/xen/interface/grant_table.h b/sys/xen/interface/grant_table.h new file mode 100644 index 0000000..d0ba9e9 --- /dev/null +++ b/sys/xen/interface/grant_table.h @@ -0,0 +1,401 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ +struct grant_entry { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry grant_entry_t; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access + * by devices and/or host CPUs. If successful, <handle> is a tracking number + * that must be presented later to destroy the mapping(s). On error, <handle> + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in <host_addr>. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by <handle> + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least + * <nr_frames> pages. The frame addresses are written to the <frame_list>. + * Only <nr_frames> addresses are written, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + XEN_GUEST_HANDLE(ulong) frame_list; +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * <domid, ref>. + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +typedef struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +} gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/hvm/e820.h b/sys/xen/interface/hvm/e820.h new file mode 100644 index 0000000..5bdc227 --- /dev/null +++ b/sys/xen/interface/hvm/e820.h @@ -0,0 +1,34 @@ + +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_E820_H__ +#define __XEN_PUBLIC_HVM_E820_H__ + +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + +#endif /* __XEN_PUBLIC_HVM_E820_H__ */ diff --git a/sys/xen/interface/hvm/hvm_info_table.h b/sys/xen/interface/hvm/hvm_info_table.h new file mode 100644 index 0000000..dfe34db --- /dev/null +++ b/sys/xen/interface/hvm/hvm_info_table.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * hvm/hvm_info_table.h + * + * HVM parameter and information table, written into guest memory map. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ + +#define HVM_INFO_PFN 0x09F +#define HVM_INFO_OFFSET 0x800 +#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) + +struct hvm_info_table { + char signature[8]; /* "HVM INFO" */ + uint32_t length; + uint8_t checksum; + uint8_t acpi_enabled; + uint8_t apic_mode; + uint32_t nr_vcpus; +}; + +#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ diff --git a/sys/xen/interface/hvm/hvm_op.h b/sys/xen/interface/hvm/hvm_op.h new file mode 100644 index 0000000..b21b0f7 --- /dev/null +++ b/sys/xen/interface/hvm/hvm_op.h @@ -0,0 +1,76 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/sys/xen/interface/hvm/ioreq.h b/sys/xen/interface/hvm/ioreq.h new file mode 100644 index 0000000..91a4cef --- /dev/null +++ b/sys/xen/interface/hvm/ioreq.h @@ -0,0 +1,122 @@ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_AND 2 +#define IOREQ_TYPE_OR 3 +#define IOREQ_TYPE_XOR 4 +#define IOREQ_TYPE_XCHG 5 +#define IOREQ_TYPE_ADD 6 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ +#define IOREQ_TYPE_SUB 9 + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t size; /* size in bytes */ + uint64_t count; /* for rep prefixes */ + uint64_t data; /* data (or paddr of data) */ + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t type; /* I/O type */ + uint8_t _pad0[6]; + uint64_t io_count; /* How many IO done on a vcpu */ +}; +typedef struct ioreq ioreq_t; + +struct vcpu_iodata { + struct ioreq vp_ioreq; + /* Event channel port, used for notifications to/from the device model. */ + uint32_t vp_eport; + uint32_t _pad0; +}; +typedef struct vcpu_iodata vcpu_iodata_t; + +struct shared_iopage { + struct vcpu_iodata vcpu_iodata[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +#define IOREQ_BUFFER_SLOT_NUM 80 +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + ioreq_t ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +#if defined(__ia64__) +struct pio_buffer { + uint32_t page_offset; + uint32_t pointer; + uint32_t data_end; + uint32_t buf_size; + void *opaque; +}; + +#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ +#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ +#define PIO_BUFFER_ENTRY_NUM 2 +struct buffered_piopage { + struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; + uint8_t buffer[1]; +}; +#endif /* defined(__ia64__) */ + +#if defined(__i386__) || defined(__x86_64__) +#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/hvm/params.h b/sys/xen/interface/hvm/params.h new file mode 100644 index 0000000..9657654 --- /dev/null +++ b/sys/xen/interface/hvm/params.h @@ -0,0 +1,60 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include "hvm_op.h" + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +#ifdef __ia64__ +#define HVM_PARAM_NVRAM_FD 7 +#define HVM_NR_PARAMS 8 +#else +#define HVM_NR_PARAMS 7 +#endif + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/sys/xen/interface/hvm/save.h b/sys/xen/interface/hvm/save.h new file mode 100644 index 0000000..e01218a --- /dev/null +++ b/sys/xen/interface/hvm/save.h @@ -0,0 +1,462 @@ +/* + * hvm/save.h + * + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_H__ +#define __XEN_PUBLIC_HVM_SAVE_H__ + +/* + * Structures in this header *must* have the same layout in 32bit + * and 64bit environments: this means that all fields must be explicitly + * sized types and aligned to their sizes, and the structs must be + * a multiple of eight bytes long. + * + * Only the state necessary for saving and restoring (i.e. fields + * that are analogous to actual hardware state) should go in this file. + * Internal mechanisms should be kept in Xen-private headers. + */ + +/* + * Each entry is preceded by a descriptor giving its type and length + */ +struct hvm_save_descriptor { + uint16_t typecode; /* Used to demux the various types below */ + uint16_t instance; /* Further demux within a type */ + uint32_t length; /* In bytes, *not* including this descriptor */ +}; + + +/* + * Each entry has a datatype associated with it: for example, the CPU state + * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), + * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). + * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system + * ugliness. + */ + +#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } + +#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) +#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) +#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) + + +/* + * Save/restore header: general info about the save file. + */ + +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header { + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + + +/* + * Processor + */ + +struct hvm_hw_cpu { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint32_t sysenter_cs; + uint32_t padding0; + + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); + + +/* + * PIC + */ + +struct hvm_hw_vpic { + /* IR line bitmasks. */ + uint8_t irr; + uint8_t imr; + uint8_t isr; + + /* Line IRx maps to IRQ irq_base+x */ + uint8_t irq_base; + + /* + * Where are we in ICW2-4 initialisation (0 means no init in progress)? + * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). + * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) + * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) + */ + uint8_t init_state:4; + + /* IR line with highest priority. */ + uint8_t priority_add:4; + + /* Reads from A=0 obtain ISR or IRR? */ + uint8_t readsel_isr:1; + + /* Reads perform a polling read? */ + uint8_t poll:1; + + /* Automatically clear IRQs from the ISR during INTA? */ + uint8_t auto_eoi:1; + + /* Automatically rotate IRQ priorities during AEOI? */ + uint8_t rotate_on_auto_eoi:1; + + /* Exclude slave inputs when considering in-service IRQs? */ + uint8_t special_fully_nested_mode:1; + + /* Special mask mode excludes masked IRs from AEOI and priority checks. */ + uint8_t special_mask_mode:1; + + /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ + uint8_t is_master:1; + + /* Edge/trigger selection. */ + uint8_t elcr; + + /* Virtual INT output. */ + uint8_t int_output; +}; + +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); + + +/* + * IO-APIC + */ + +#ifdef __ia64__ +#define VIOAPIC_IS_IOSAPIC 1 +#define VIOAPIC_NUM_PINS 24 +#else +#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ +#endif + +struct hvm_hw_vioapic { + uint64_t base_address; + uint32_t ioregsel; + uint32_t id; + union vioapic_redir_entry + { + uint64_t bits; + struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; +#if !VIOAPIC_IS_IOSAPIC + uint8_t reserved[4]; + uint8_t dest_id; +#else + uint8_t reserved[3]; + uint16_t dest_id; +#endif + } fields; + } redirtbl[VIOAPIC_NUM_PINS]; +}; + +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); + + +/* + * LAPIC + */ + +struct hvm_hw_lapic { + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); + +struct hvm_hw_lapic_regs { + /* A 4k page of register state */ + uint8_t data[0x400]; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); + + +/* + * IRQs + */ + +struct hvm_hw_pci_irqs { + /* + * Virtual interrupt wires for a single PCI bus. + * Indexed by: device*4 + INTx#. + */ + union { + DECLARE_BITMAP(i, 32*4); + uint64_t pad[2]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); + +struct hvm_hw_isa_irqs { + /* + * Virtual interrupt wires for ISA devices. + * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). + */ + union { + DECLARE_BITMAP(i, 16); + uint64_t pad[1]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); + +struct hvm_hw_pci_link { + /* + * PCI-ISA interrupt router. + * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using + * the traditional 'barber's pole' mapping ((device + INTx#) & 3). + * The router provides a programmable mapping from each link to a GSI. + */ + uint8_t route[4]; + uint8_t pad0[4]; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); + +/* + * PIT + */ + +struct hvm_hw_pit { + struct hvm_hw_pit_channel { + uint32_t count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + } channels[3]; /* 3 x 16 bytes */ + uint32_t speaker_data_on; + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); + + +/* + * RTC + */ + +#define RTC_CMOS_SIZE 14 +struct hvm_hw_rtc { + /* CMOS bytes */ + uint8_t cmos_data[RTC_CMOS_SIZE]; + /* Index register for 2-part operations */ + uint8_t cmos_index; + uint8_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); + + +/* + * HPET + */ + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct hvm_hw_hpet { + /* Memory-mapped, software visible registers */ + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t fsb; /* FSB route, not supported now */ + uint64_t res4; /* reserved */ + } timers[HPET_TIMER_NUM]; + uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ + + /* Hidden register state */ + uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ +}; + +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); + + +/* + * PM timer + */ + +struct hvm_hw_pmtimer { + uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ + uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ + uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ +}; + +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 13 + + +/* + * The series of save records is teminated by a zero-type, zero-length + * descriptor. + */ + +struct hvm_save_end {}; +DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); + +#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ diff --git a/sys/xen/interface/hvm/vmx_assist.h b/sys/xen/interface/hvm/vmx_assist.h new file mode 100644 index 0000000..4ef17fe --- /dev/null +++ b/sys/xen/interface/hvm/vmx_assist.h @@ -0,0 +1,122 @@ +/* + * vmx_assist.h: Context definitions for the VMXASSIST world switch. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Leendert van Doorn, leendert@watson.ibm.com + * Copyright (c) 2005, International Business Machines Corporation. + */ + +#ifndef _VMX_ASSIST_H_ +#define _VMX_ASSIST_H_ + +#define VMXASSIST_BASE 0xD0000 +#define VMXASSIST_MAGIC 0x17101966 +#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8) + +#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12) +#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4) + +#ifndef __ASSEMBLY__ + +#define NR_EXCEPTION_HANDLER 32 +#define NR_INTERRUPT_HANDLERS 16 +#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS) + +union vmcs_arbytes { + struct arbyte_fields { + unsigned int seg_type : 4, + s : 1, + dpl : 2, + p : 1, + reserved0 : 4, + avl : 1, + reserved1 : 1, + default_ops_size: 1, + g : 1, + null_bit : 1, + reserved2 : 15; + } fields; + unsigned int bytes; +}; + +/* + * World switch state + */ +struct vmx_assist_context { + uint32_t eip; /* execution pointer */ + uint32_t esp; /* stack pointer */ + uint32_t eflags; /* flags register */ + uint32_t cr0; + uint32_t cr3; /* page table directory */ + uint32_t cr4; + uint32_t idtr_limit; /* idt */ + uint32_t idtr_base; + uint32_t gdtr_limit; /* gdt */ + uint32_t gdtr_base; + uint32_t cs_sel; /* cs selector */ + uint32_t cs_limit; + uint32_t cs_base; + union vmcs_arbytes cs_arbytes; + uint32_t ds_sel; /* ds selector */ + uint32_t ds_limit; + uint32_t ds_base; + union vmcs_arbytes ds_arbytes; + uint32_t es_sel; /* es selector */ + uint32_t es_limit; + uint32_t es_base; + union vmcs_arbytes es_arbytes; + uint32_t ss_sel; /* ss selector */ + uint32_t ss_limit; + uint32_t ss_base; + union vmcs_arbytes ss_arbytes; + uint32_t fs_sel; /* fs selector */ + uint32_t fs_limit; + uint32_t fs_base; + union vmcs_arbytes fs_arbytes; + uint32_t gs_sel; /* gs selector */ + uint32_t gs_limit; + uint32_t gs_base; + union vmcs_arbytes gs_arbytes; + uint32_t tr_sel; /* task selector */ + uint32_t tr_limit; + uint32_t tr_base; + union vmcs_arbytes tr_arbytes; + uint32_t ldtr_sel; /* ldtr selector */ + uint32_t ldtr_limit; + uint32_t ldtr_base; + union vmcs_arbytes ldtr_arbytes; + + unsigned char rm_irqbase[2]; +}; +typedef struct vmx_assist_context vmx_assist_context_t; + +#endif /* __ASSEMBLY__ */ + +#endif /* _VMX_ASSIST_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/blkif.h b/sys/xen/interface/io/blkif.h new file mode 100644 index 0000000..d2bd652 --- /dev/null +++ b/sys/xen/interface/io/blkif.h @@ -0,0 +1,128 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include <xen/interface/io/ring.h> +#include <xen/interface/grant_table.h> + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * Recognised only if "feature-barrier" is present in backend xenbus info. + * The "feature_barrier" node contains a boolean indicating whether barrier + * requests are likely to succeed or fail. Either way, a barrier request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt barrier requests. + * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* + * create the "feature-barrier" node! + */ +#define BLKIF_OP_WRITE_BARRIER 2 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ + +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/console.h b/sys/xen/interface/io/console.h new file mode 100644 index 0000000..4b8c01a --- /dev/null +++ b/sys/xen/interface/io/console.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/fbif.h b/sys/xen/interface/io/fbif.h new file mode 100644 index 0000000..261d756 --- /dev/null +++ b/sys/xen/interface/io/fbif.h @@ -0,0 +1,138 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + * No in events currently defined. + */ + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4, that's 4 Megs. Two directory + * pages should be enough for a while. + */ + unsigned long pd[2]; +}; + +/* + * Wart: xenkbd needs to know resolution. Put it here until a better + * solution is found, but don't leak it to the backend. + */ +#ifdef __KERNEL__ +#define XENFB_WIDTH 800 +#define XENFB_HEIGHT 600 +#define XENFB_DEPTH 32 +#endif + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/kbdif.h b/sys/xen/interface/io/kbdif.h new file mode 100644 index 0000000..38193c8 --- /dev/null +++ b/sys/xen/interface/io/kbdif.h @@ -0,0 +1,130 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* Pointer movement event */ +#define XENKBD_TYPE_MOTION 1 +/* Event type 2 currently not used */ +/* Key event (includes pointer buttons) */ +#define XENKBD_TYPE_KEY 3 +/* + * Pointer position event + * Capable backend sets feature-abs-pointer in xenstore. + * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting + * request-abs-update in xenstore. + */ +#define XENKBD_TYPE_POS 4 + +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/netif.h b/sys/xen/interface/io/netif.h new file mode 100644 index 0000000..a2e885c --- /dev/null +++ b/sys/xen/interface/io/netif.h @@ -0,0 +1,184 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * This is the 'wire' format for packets: + * Request 1: netif_tx_request -- NETTXF_* (any flags) + * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) + * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * Request 4: netif_tx_request -- NETTXF_more_data + * Request 5: netif_tx_request -- NETTXF_more_data + * ... + * Request N: netif_tx_request -- 0 + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +struct netif_tx_request { + grant_ref_t gref; /* Reference to buffer page */ + uint16_t offset; /* Offset within buffer page */ + uint16_t flags; /* NETTXF_* */ + uint16_t id; /* Echoed in response message. */ + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MAX (2) + +/* netif_extra_info flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types - only TCPv4 currently supported. */ +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) + +/* + * This structure needs to fit within both netif_tx_request and + * netif_rx_response for compatibility. + */ +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + uint16_t pad[3]; + } u; +}; + +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ + uint16_t flags; /* NETRXF_* */ + int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ +#define NETIF_RSP_NULL 1 + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/pciif.h b/sys/xen/interface/io/pciif.h new file mode 100644 index 0000000..42dcfdb --- /dev/null +++ b/sys/xen/interface/io/pciif.h @@ -0,0 +1,83 @@ +/* + * PCI Backend/Frontend Common Data Structures & Macros + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCI_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; +}; + +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/protocols.h b/sys/xen/interface/io/protocols.h new file mode 100644 index 0000000..01fc8ae --- /dev/null +++ b/sys/xen/interface/io/protocols.h @@ -0,0 +1,21 @@ +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_IA64 "ia64-abi" +#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__ia64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 +#elif defined(__powerpc64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 +#else +# error arch fixup needed here +#endif + +#endif diff --git a/sys/xen/interface/io/ring.h b/sys/xen/interface/io/ring.h new file mode 100644 index 0000000..355eba5 --- /dev/null +++ b/sys/xen/interface/io/ring.h @@ -0,0 +1,299 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* Initialize to existing shared indexes -- for recovery */ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + mb(); /* front sees new responses /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/tpmif.h b/sys/xen/interface/io/tpmif.h new file mode 100644 index 0000000..02ccdab --- /dev/null +++ b/sys/xen/interface/io/tpmif.h @@ -0,0 +1,77 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +#include "../grant_table.h" + +struct tpmif_tx_request { + unsigned long addr; /* Machine address of packet. */ + grant_ref_t ref; /* grant table access reference */ + uint16_t unused; + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef uint32_t TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 1 + +/* This structure must fit in a memory page. */ + +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; + +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/xenbus.h b/sys/xen/interface/io/xenbus.h new file mode 100644 index 0000000..28584e5 --- /dev/null +++ b/sys/xen/interface/io/xenbus.h @@ -0,0 +1,73 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/io/xs_wire.h b/sys/xen/interface/io/xs_wire.h new file mode 100644 index 0000000..927ed8c --- /dev/null +++ b/sys/xen/interface/io/xs_wire.h @@ -0,0 +1,121 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#define XSD_ERROR(x) { x, #x } +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN) +}; + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; +}; + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/kexec.h b/sys/xen/interface/kexec.h new file mode 100644 index 0000000..ca6e85b --- /dev/null +++ b/sys/xen/interface/kexec.h @@ -0,0 +1,137 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Xen port written by: + * - Simon 'Horms' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load 1 +#define KEXEC_CMD_kexec_unload 2 +typedef struct xen_kexec_load { + int type; + xen_kexec_image_t image; +} xen_kexec_load_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/libelf.h b/sys/xen/interface/libelf.h new file mode 100644 index 0000000..4a580e0 --- /dev/null +++ b/sys/xen/interface/libelf.h @@ -0,0 +1,241 @@ +#ifndef __XC_LIBELF__ +#define __XC_LIBELF__ 1 + +#if defined(__i386__) || defined(__x86_64) || defined(__ia64__) +#define XEN_ELF_LITTLE_ENDIAN +#elif defined(__powerpc__) +#define XEN_ELF_BIG_ENDIAN +#else +#error define architectural endianness +#endif + +#undef ELFSIZE +#include "elfnote.h" +#include "elfstructs.h" +#include "features.h" + +/* ------------------------------------------------------------------------ */ + +typedef union { + Elf32_Ehdr e32; + Elf64_Ehdr e64; +} elf_ehdr; + +typedef union { + Elf32_Phdr e32; + Elf64_Phdr e64; +} elf_phdr; + +typedef union { + Elf32_Shdr e32; + Elf64_Shdr e64; +} elf_shdr; + +typedef union { + Elf32_Sym e32; + Elf64_Sym e64; +} elf_sym; + +typedef union { + Elf32_Rel e32; + Elf64_Rel e64; +} elf_rel; + +typedef union { + Elf32_Rela e32; + Elf64_Rela e64; +} elf_rela; + +typedef union { + Elf32_Note e32; + Elf64_Note e64; +} elf_note; + +struct elf_binary { + /* elf binary */ + const char *image; + size_t size; + char class; + char data; + + const elf_ehdr *ehdr; + const char *sec_strtab; + const elf_shdr *sym_tab; + const char *sym_strtab; + + /* loaded to */ + char *dest; + uint64_t pstart; + uint64_t pend; + uint64_t reloc_offset; + +#ifndef __XEN__ + /* misc */ + FILE *log; +#endif + int verbose; +}; + +/* ------------------------------------------------------------------------ */ +/* accessing elf header fields */ + +#ifdef XEN_ELF_BIG_ENDIAN +# define NATIVE_ELFDATA ELFDATA2MSB +#else +# define NATIVE_ELFDATA ELFDATA2LSB +#endif + +#define elf_32bit(elf) (ELFCLASS32 == (elf)->class) +#define elf_64bit(elf) (ELFCLASS64 == (elf)->class) +#define elf_msb(elf) (ELFDATA2MSB == (elf)->data) +#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data) +#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data) + +#define elf_uval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_sval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_size(elf, str) \ + ((ELFCLASS64 == (elf)->class) \ + ? sizeof((str)->e64) \ + : sizeof((str)->e32)) + +uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr, + uint64_t offset, size_t size); +int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, + uint64_t offset, size_t size); + +uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_tools.c */ + +int elf_shdr_count(struct elf_binary *elf); +int elf_phdr_count(struct elf_binary *elf); + +const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name); +const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index); +const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index); + +const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr); +const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr); +const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr); + +const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr); +const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr); + +const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol); +const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index); + +const char *elf_note_name(struct elf_binary *elf, const elf_note * note); +const void *elf_note_desc(struct elf_binary *elf, const elf_note * note); +uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note); +const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note); + +int elf_is_elfbinary(const void *image); +int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_loader.c */ + +int elf_init(struct elf_binary *elf, const char *image, size_t size); +#ifdef __XEN__ +void elf_set_verbose(struct elf_binary *elf); +#else +void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose); +#endif + +void elf_parse_binary(struct elf_binary *elf); +void elf_load_binary(struct elf_binary *elf); + +void *elf_get_ptr(struct elf_binary *elf, unsigned long addr); +uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_relocate.c */ + +int elf_reloc(struct elf_binary *elf); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_dominfo.c */ + +#define UNSET_ADDR ((uint64_t)-1) + +enum xen_elfnote_type { + XEN_ENT_NONE = 0, + XEN_ENT_LONG = 1, + XEN_ENT_STR = 2 +}; + +struct xen_elfnote { + enum xen_elfnote_type type; + const char *name; + union { + const char *str; + uint64_t num; + } data; +}; + +struct elf_dom_parms { + /* raw */ + const char *guest_info; + const void *elf_note_start; + const void *elf_note_end; + struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1]; + + /* parsed */ + char guest_os[16]; + char guest_ver[16]; + char xen_ver[16]; + char loader[16]; + int pae; + int bsd_symtab; + uint64_t virt_base; + uint64_t virt_entry; + uint64_t virt_hypercall; + uint64_t virt_hv_start_low; + uint64_t elf_paddr_offset; + uint32_t f_supported[XENFEAT_NR_SUBMAPS]; + uint32_t f_required[XENFEAT_NR_SUBMAPS]; + + /* calculated */ + uint64_t virt_offset; + uint64_t virt_kstart; + uint64_t virt_kend; +}; + +static inline void elf_xen_feature_set(int nr, uint32_t * addr) +{ + addr[nr >> 5] |= 1 << (nr & 31); +} +static inline int elf_xen_feature_get(int nr, uint32_t * addr) +{ + return !!(addr[nr >> 5] & (1 << (nr & 31))); +} + +int elf_xen_parse_features(const char *features, + uint32_t *supported, + uint32_t *required); +int elf_xen_parse_note(struct elf_binary *elf, + struct elf_dom_parms *parms, + const elf_note *note); +int elf_xen_parse_guest_info(struct elf_binary *elf, + struct elf_dom_parms *parms); +int elf_xen_parse(struct elf_binary *elf, + struct elf_dom_parms *parms); + +#endif /* __XC_LIBELF__ */ diff --git a/sys/xen/interface/memory.h b/sys/xen/interface/memory.h new file mode 100644 index 0000000..0fb4086 --- /dev/null +++ b/sys/xen/interface/memory.h @@ -0,0 +1,281 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + + /* + * Maximum # bits addressable by the user of the allocated region (e.g., + * I/O devices often have a 32-bit limitation even in 64-bit systems). If + * zero then the user has no addressing restriction. + * This field is not used by XENMEM_decrease_reservation. + */ + unsigned int address_bits; + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Source mapping space. */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ + unsigned int space; + + /* Index into source mapping space. */ + xen_ulong_t idx; + + /* GPFN where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/* + * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error + * code on failure. This call only works for auto-translated guests. + */ +#define XENMEM_translate_gpfn_list 8 +struct xen_translate_gpfn_list { + /* Which domain to translate for? */ + domid_t domid; + + /* Length of list. */ + xen_ulong_t nr_gpfns; + + /* List of GPFNs to translate. */ + XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; + + /* + * Output list to contain MFN translations. May be the same as the input + * list (in which case each input GPFN is overwritten with the output MFN). + */ + XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; +}; +typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + void *buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/nmi.h b/sys/xen/interface/nmi.h new file mode 100644 index 0000000..b2b8401 --- /dev/null +++ b/sys/xen/interface/nmi.h @@ -0,0 +1,78 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +typedef struct xennmi_callback xennmi_callback_t; +DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/physdev.h b/sys/xen/interface/physdev.h new file mode 100644 index 0000000..6f78a09 --- /dev/null +++ b/sys/xen/interface/physdev.h @@ -0,0 +1,169 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ + XEN_GUEST_HANDLE_00030205(uint8_t) bitmap; + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/platform.h b/sys/xen/interface/platform.h new file mode 100644 index 0000000..dd3acb2 --- /dev/null +++ b/sys/xen/interface/platform.h @@ -0,0 +1,183 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include "xen.h" + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC, + * 1 January, 1970 if the current system time was <system_time>. + */ +#define XENPF_settime 17 +struct xenpf_settime { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +typedef struct xenpf_settime xenpf_settime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_add_memtype xenpf_add_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_del_memtype xenpf_del_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +typedef struct xenpf_read_memtype xenpf_read_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + void * data; /* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +typedef struct xenpf_microcode_update xenpf_microcode_update_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + void * edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8_t) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime settime; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_firmware_info firmware_info; + uint8_t pad[128]; + } u; +}; +typedef struct xen_platform_op xen_platform_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/sched.h b/sys/xen/interface/sched.h new file mode 100644 index 0000000..2227a95 --- /dev/null +++ b/sys/xen/interface/sched.h @@ -0,0 +1,121 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * The prototype for this hypercall is: + * long sched_op(int cmd, void *arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * This legacy version is available to new guests as sched_op_compat(). + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + */ +#define SCHEDOP_shutdown 2 +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_xxx reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/sysctl.h b/sys/xen/interface/sysctl.h new file mode 100644 index 0000000..d51052a --- /dev/null +++ b/sys/xen/interface/sysctl.h @@ -0,0 +1,198 @@ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000003 + +/* + * Read console content from Xen buffer ring. + */ +#define XEN_SYSCTL_readconsole 1 +struct xen_sysctl_readconsole { + /* IN variables. */ + uint32_t clear; /* Non-zero -> clear after reading. */ + XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */ + /* IN/OUT variables. */ + uint32_t count; /* In: Buffer size; Out: Used buffer size */ +}; +typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); + +/* Get trace buffers machine base address */ +#define XEN_SYSCTL_tbuf_op 2 +struct xen_sysctl_tbuf_op { + /* IN variables */ +#define XEN_SYSCTL_TBUFOP_get_info 0 +#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 +#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 +#define XEN_SYSCTL_TBUFOP_set_size 3 +#define XEN_SYSCTL_TBUFOP_enable 4 +#define XEN_SYSCTL_TBUFOP_disable 5 + uint32_t cmd; + /* IN/OUT variables */ + struct xenctl_cpumap cpu_mask; + uint32_t evt_mask; + /* OUT variables */ + uint64_aligned_t buffer_mfn; + uint32_t size; +}; +typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); + +/* + * Get physical information about the host machine + */ +#define XEN_SYSCTL_physinfo 3 +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t sockets_per_node; + uint32_t nr_nodes; + uint32_t cpu_khz; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint32_t hw_cap[8]; +}; +typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); + +/* + * Get the ID of the current scheduler. + */ +#define XEN_SYSCTL_sched_id 4 +struct xen_sysctl_sched_id { + /* OUT variable */ + uint32_t sched_id; +}; +typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); + +/* Interface for controlling Xen software performance counters. */ +#define XEN_SYSCTL_perfc_op 5 +/* Sub-operations: */ +#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ +#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ +struct xen_sysctl_perfc_desc { + char name[80]; /* name of perf counter */ + uint32_t nr_vals; /* number of values for this counter */ +}; +typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); +typedef uint32_t xen_sysctl_perfc_val_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); + +struct xen_sysctl_perfc_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ + /* OUT variables. */ + uint32_t nr_counters; /* number of counters description */ + uint32_t nr_vals; /* number of values */ + /* counter information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + /* counter values (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; +}; +typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); + +#define XEN_SYSCTL_getdomaininfolist 6 +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; +typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); + +/* Inject debug keys into Xen. */ +#define XEN_SYSCTL_debug_keys 7 +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +#define XEN_SYSCTL_getcpuinfo 8 +struct xen_sysctl_cpuinfo { + uint64_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + +struct xen_sysctl { + uint32_t cmd; + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_readconsole readconsole; + struct xen_sysctl_tbuf_op tbuf_op; + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_sched_id sched_id; + struct xen_sysctl_perfc_op perfc_op; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; + uint8_t pad[128]; + } u; +}; +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/trace.h b/sys/xen/interface/trace.h new file mode 100644 index 0000000..33654e4 --- /dev/null +++ b/sys/xen/interface/trace.h @@ -0,0 +1,120 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_ALL 0xfffff000 + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ + +/* Trace events per class */ +#define TRC_LOST_RECORDS (TRC_GEN + 1) + +#define TRC_SCHED_DOM_ADD (TRC_SCHED + 1) +#define TRC_SCHED_DOM_REM (TRC_SCHED + 2) +#define TRC_SCHED_SLEEP (TRC_SCHED + 3) +#define TRC_SCHED_WAKE (TRC_SCHED + 4) +#define TRC_SCHED_YIELD (TRC_SCHED + 5) +#define TRC_SCHED_BLOCK (TRC_SCHED + 6) +#define TRC_SCHED_SHUTDOWN (TRC_SCHED + 7) +#define TRC_SCHED_CTL (TRC_SCHED + 8) +#define TRC_SCHED_ADJDOM (TRC_SCHED + 9) +#define TRC_SCHED_SWITCH (TRC_SCHED + 10) +#define TRC_SCHED_S_TIMER_FN (TRC_SCHED + 11) +#define TRC_SCHED_T_TIMER_FN (TRC_SCHED + 12) +#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED + 13) +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14) +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15) + +#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) +#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) +#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) + +/* trace events per subclass */ +#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) +#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) +#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) +#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) +#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) +#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) +#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) +#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) +#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) +#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) +#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) +#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) +#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) +#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) +#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) +#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) +#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) +#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) +#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) +#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) +#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) +#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) +#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) + +/* This structure represents a single trace buffer record. */ +struct t_rec { + uint64_t cycles; /* cycle counter timestamp */ + uint32_t event; /* event ID */ + unsigned long data[5]; /* event data items */ +}; + +/* + * This structure contains the metadata for a single trace buffer. The head + * field, indexes into an array of struct t_rec's. + */ +struct t_buf { + uint32_t cons; /* Next item to be consumed by control tools. */ + uint32_t prod; /* Next item to be produced by Xen. */ + /* 'nr_recs' records follow immediately after the meta-data header. */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/vcpu.h b/sys/xen/interface/vcpu.h new file mode 100644 index 0000000..a84eb51 --- /dev/null +++ b/sys/xen/interface/vcpu.h @@ -0,0 +1,192 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +/* + * Prototype for this hypercall is: + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +#endif /* __XEN_PUBLIC_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/version.h b/sys/xen/interface/version.h new file mode 100644 index 0000000..944ca62 --- /dev/null +++ b/sys/xen/interface/version.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com> + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; +typedef struct xen_compile_info xen_compile_info_t; + +#define XENVER_capabilities 3 +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + unsigned long virt_start; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; +typedef struct xen_feature_info xen_feature_info_t; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#endif /* __XEN_PUBLIC_VERSION_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/xen-compat.h b/sys/xen/interface/xen-compat.h new file mode 100644 index 0000000..19b0a2c --- /dev/null +++ b/sys/xen/interface/xen-compat.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +/* Fields defined as a Xen guest handle since 0x00030205. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 +#define XEN_GUEST_HANDLE_00030205(type) XEN_GUEST_HANDLE(type) +#else +#define XEN_GUEST_HANDLE_00030205(type) type * +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff --git a/sys/xen/interface/xen.h b/sys/xen/interface/xen.h new file mode 100644 index 0000000..0606dbe --- /dev/null +++ b/sys/xen/interface/xen.h @@ -0,0 +1,613 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include <xen/interface/xen-compat.h> + +#if defined(__i386__) || defined(__x86_64__) +#include <xen/interface/arch-x86/xen.h> +#elif defined(__ia64__) +#include "arch-ia64.h" +#elif defined(__powerpc__) +#include "arch-powerpc.h" +#else +#error "Unsupported architecture" +#endif + +/* + * HYPERCALLS + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#warning using compat ops +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 + +#define NR_VIRQS 24 + +/* + * MMU-UPDATE REQUESTS + * + * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * ptr[1:0] specifies the appropriate MMU_* command. + * + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table. If updating an L1 table, and the new + * table entry is valid/present, the mapped frame must belong to the FD, if + * an FD has been specified. If attempting to map an I/O page then the + * caller assumes the privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ + +/* + * MMU EXTENDED OPERATIONS + * + * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ + XEN_GUEST_HANDLE_00030205(void) vcpumask; + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * Send an array of these to HYPERVISOR_multicall(). + * NB. The fields are natural register size for this architecture. + */ +struct multicall_entry { + unsigned long op, result; + unsigned long args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +/* + * Event channel endpoints per domain: + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + unsigned long evtchn_pending[sizeof(unsigned long) * 8]; + unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * Start-of-day memory layout: + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base, CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + */ + +#define MAX_GUEST_CMDLINE 1024 +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen-<version>-<platform>". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ + +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +DEFINE_XEN_GUEST_HANDLE(uint8_t); +DEFINE_XEN_GUEST_HANDLE(uint16_t); +DEFINE_XEN_GUEST_HANDLE(uint32_t); +DEFINE_XEN_GUEST_HANDLE(uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif +#endif + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/interface/xencomm.h b/sys/xen/interface/xencomm.h new file mode 100644 index 0000000..ac45e07 --- /dev/null +++ b/sys/xen/interface/xencomm.h @@ -0,0 +1,41 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 + */ + +#ifndef _XEN_XENCOMM_H_ +#define _XEN_XENCOMM_H_ + +/* A xencomm descriptor is a scatter/gather list containing physical + * addresses corresponding to a virtually contiguous memory area. The + * hypervisor translates these physical addresses to machine addresses to copy + * to and from the virtually contiguous area. + */ + +#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ +#define XENCOMM_INVALID (~0UL) + +struct xencomm_desc { + uint32_t magic; + uint32_t nr_addrs; /* the number of entries in address[] */ + uint64_t address[0]; +}; + +#endif /* _XEN_XENCOMM_H_ */ diff --git a/sys/xen/interface/xenoprof.h b/sys/xen/interface/xenoprof.h new file mode 100644 index 0000000..183078d --- /dev/null +++ b/sys/xen/interface/xenoprof.h @@ -0,0 +1,138 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +#include "xen.h" + +/* + * Commands to HYPERVISOR_xenoprof_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_reset_active_list 1 +#define XENOPROF_reset_passive_list 2 +#define XENOPROF_set_active 3 +#define XENOPROF_set_passive 4 +#define XENOPROF_reserve_counters 5 +#define XENOPROF_counter 6 +#define XENOPROF_setup_events 7 +#define XENOPROF_enable_virq 8 +#define XENOPROF_start 9 +#define XENOPROF_stop 10 +#define XENOPROF_disable_virq 11 +#define XENOPROF_release_counters 12 +#define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_set_backtrace 15 +#define XENOPROF_last_op 15 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* PC value that indicates a special code */ +#define XENOPROF_ESCAPE_CODE ~0UL +/* Transient events for the xenoprof->oprofile cpu buf */ +#define XENOPROF_TRACE_BEGIN 1 + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +}; +#ifndef __XEN__ +typedef struct xenoprof_buf xenoprof_buf_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif + +struct xenoprof_init { + int32_t num_events; + int32_t is_primary; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +}; +typedef struct xenoprof_init xenoprof_init_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); + +struct xenoprof_counter { + uint32_t ind; + uint64_t count; + uint32_t enabled; + uint32_t event; + uint32_t hypervisor; + uint32_t kernel; + uint32_t user; + uint64_t unit_mask; +}; +typedef struct xenoprof_counter xenoprof_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); + +typedef struct xenoprof_passive { + uint16_t domain_id; + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +} xenoprof_passive_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); + + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/xen/xenbus/init.txt b/sys/xen/xenbus/init.txt new file mode 100644 index 0000000..4249549 --- /dev/null +++ b/sys/xen/xenbus/init.txt @@ -0,0 +1,14 @@ + + +- frontend driver initializes static xenbus_driver with _ids, _probe, _remove, +_resume, _otherend_changed + + - initialization calls xenbus_register_frontend(xenbus_driver) + + - xenbus_register_frontend sets read_otherend details to read_backend_details + then calls xenbus_register_driver_common(xenbus_driver, xenbus_frontend) + + - xenbus_register_driver_common sets underlying driver name to xenbus_driver name + underlying driver bus to xenbus_frontend's bus, driver's probe to xenbus_dev_probe + driver's remove to xenbus_dev_remove then calls driver_register + diff --git a/sys/xen/xenbus/xenbus_client.c b/sys/xen/xenbus/xenbus_client.c new file mode 100644 index 0000000..45a5eb6 --- /dev/null +++ b/sys/xen/xenbus/xenbus_client.c @@ -0,0 +1,301 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#if 0 +#define DPRINTK(fmt, args...) \ + printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/malloc.h> +#include <sys/libkern.h> +#include <machine/xen/evtchn.h> +#include <xen/gnttab.h> +#include <machine/xen/xenbus.h> +#include <machine/stdarg.h> + + +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free(ptr, M_DEVBUF) +#define BUG_ON PANIC_IF + +int +xenbus_watch_path(struct xenbus_device *dev, char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + + watch->node = path; + watch->callback = callback; + + err = register_xenbus_watch(watch); + + if (err) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, err, "adding watch on %s", path); + } + + return err; +} +EXPORT_SYMBOL(xenbus_watch_path); + + +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + char *state = + kmalloc(strlen(path) + 1 + strlen(path2) + 1, GFP_KERNEL); + if (!state) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + strcpy(state, path); + strcat(state, "/"); + strcat(state, path2); + + err = xenbus_watch_path(dev, state, watch, callback); + + if (err) { + kfree(state); + } + return err; +} +EXPORT_SYMBOL(xenbus_watch_path2); + + +int xenbus_switch_state(struct xenbus_device *dev, + XenbusState state) +{ + /* We check whether the state is currently set to the given value, and + if not, then the state is set. We don't want to unconditionally + write the given state, because we don't want to fire watches + unnecessarily. Furthermore, if the node has gone, we don't write + to it, as the device will be tearing down, and we don't want to + resurrect that directory. + */ + + int current_state; + int err; + + if (state == dev->state) + return (0); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", + ¤t_state); + if (err != 1) + return 0; + + err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); + if (err) { + if (state != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, err, "writing new state"); + return err; + } + + dev->state = state; + return 0; + +} + +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); +#if 0 + complete(&dev->down); +#endif + return 0; +} + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char *error_path(struct xenbus_device *dev) +{ + char *path_buffer = kmalloc(strlen("error/") + strlen(dev->nodename) + + 1, GFP_KERNEL); + if (path_buffer == NULL) { + return NULL; + } + + strcpy(path_buffer, "error/"); + strcpy(path_buffer + strlen("error/"), dev->nodename); + + return path_buffer; +} + + +static void _dev_error(struct xenbus_device *dev, int err, const char *fmt, + va_list ap) +{ + int ret; + unsigned int len; + char *printf_buffer = NULL, *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + goto fail; + + len = sprintf(printf_buffer, "%i ", -err); + ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1); +#if 0 + dev_err(&dev->dev, "%s\n", printf_buffer); +#endif + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + + if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + + fail: + if (printf_buffer) + kfree(printf_buffer); + if (path_buffer) + kfree(path_buffer); +} + + +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL(xenbus_dev_error); + + +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_switch_state(dev, XenbusStateClosing); +} +EXPORT_SYMBOL(xenbus_dev_fatal); + + +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +{ + int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); + if (err < 0) + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; +} +EXPORT_SYMBOL(xenbus_grant_ring); + + +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = dev->otherend_id; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + if (err) + xenbus_dev_fatal(dev, err, "allocating event channel"); + else + *port = alloc_unbound.port; + return err; +} +EXPORT_SYMBOL(xenbus_alloc_evtchn); + + +int xenbus_free_evtchn(struct xenbus_device *dev, int port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) + xenbus_dev_error(dev, err, "freeing event channel %d", port); + return err; +} +EXPORT_SYMBOL(xenbus_free_evtchn); + + +XenbusState xenbus_read_driver_state(const char *path) +{ + XenbusState result; + + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (err) + result = XenbusStateClosed; + + return result; +} +EXPORT_SYMBOL(xenbus_read_driver_state); + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/sys/xen/xenbus/xenbus_comms.c b/sys/xen/xenbus/xenbus_comms.c new file mode 100644 index 0000000..50acef3 --- /dev/null +++ b/sys/xen/xenbus/xenbus_comms.c @@ -0,0 +1,249 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/cdefs.h> +#include <sys/time.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/syslog.h> +#include <sys/proc.h> +#include <sys/kernel.h> + + + +#include <machine/xen/hypervisor.h> +#include <machine/xen/evtchn.h> +#include <machine/xen/xenbus.h> +#include <machine/xen/xen_intr.h> +#include <xen/xenbus/xenbus_comms.h> + +static int xenbus_irq; + +extern void xenbus_probe(void *); +extern int xenstored_ready; +#if 0 +static DECLARE_WORK(probe_work, xenbus_probe, NULL); +#endif +int xb_wait; +extern char *xen_store; +#define wake_up wakeup +#define xb_waitq xb_wait +#define pr_debug(a,b,c) + +static inline struct xenstore_domain_interface *xenstore_domain_interface(void) +{ + return (struct xenstore_domain_interface *)xen_store; +} + +static void +wake_waiting(void * arg __attribute__((unused))) +{ +#if 0 + if (unlikely(xenstored_ready == 0)) { + xenstored_ready = 1; + schedule_work(&probe_work); + } +#endif + wakeup(&xb_wait); +} + +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); +} + +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); +} + +int xb_write(const void *tdata, unsigned len) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + const char *data = (const char *)tdata; + + while (len != 0) { + void *dst; + unsigned int avail; + wait_event_interruptible(&xb_waitq, + (intf->req_prod - intf->req_cons) != + XENSTORE_RING_SIZE); + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + mb(); + if (!check_indexes(cons, prod)) { + intf->req_cons = intf->req_prod = 0; + return -EIO; + } + + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + mb(); + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* Other side must not see new header until data is there. */ + wmb(); + intf->req_prod += avail; + + /* This implies mb() before other side sees interrupt. */ + notify_remote_via_evtchn(xen_start_info->store_evtchn); + } + + return 0; +} + +#ifdef notyet +int xb_data_to_read(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + return (intf->rsp_cons != intf->rsp_prod); +} + +int xb_wait_for_data_to_read(void) +{ + return wait_event_interruptible(xb_waitq, xb_data_to_read()); +} +#endif + + +int xb_read(void *tdata, unsigned len) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + char *data = (char *)tdata; + + while (len != 0) { + unsigned int avail; + const char *src; + + wait_event_interruptible(&xb_waitq, + intf->rsp_cons != intf->rsp_prod); + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + if (!check_indexes(cons, prod)) { + intf->rsp_cons = intf->rsp_prod = 0; + return -EIO; + } + + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* We must read header before we read data. */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); + + /* Implies mb(): they will see new header. */ + notify_remote_via_evtchn(xen_start_info->store_evtchn); + } + + return 0; +} + +/* Set up interrupt handler off store event channel. */ +int xb_init_comms(void) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + int err; + + if (intf->rsp_prod != intf->rsp_cons) { + log(LOG_WARNING, "XENBUS response ring is not quiescent " + "(%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + intf->rsp_cons = intf->rsp_prod; + } + + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + + err = bind_caller_port_to_irqhandler( + xen_start_info->store_evtchn, + "xenbus", wake_waiting, NULL, INTR_TYPE_NET, NULL); + if (err <= 0) { + log(LOG_WARNING, "XENBUS request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + + return 0; +} + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ diff --git a/sys/xen/xenbus/xenbus_comms.h b/sys/xen/xenbus/xenbus_comms.h new file mode 100644 index 0000000..4ec46f6 --- /dev/null +++ b/sys/xen/xenbus/xenbus_comms.h @@ -0,0 +1,156 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +int xs_init(void); +int xb_init_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xs_input_avail(void); +extern int xb_waitq; + +#define __wait_event_interruptible(wchan, condition, ret) \ +do { \ + for (;;) { \ + if (condition) \ + break; \ + if ((ret = !tsleep(wchan, PWAIT | PCATCH, "waitev", hz/10))) \ + break; \ + } \ +} while (0) + + +#define wait_event_interruptible(wchan, condition) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __wait_event_interruptible(wchan, condition, __ret); \ + __ret; \ +}) + + + +#define DECLARE_MUTEX(lock) struct sema lock +#define semaphore sema +#define rw_semaphore sema + +#define down sema_wait +#define up sema_post +#define down_read sema_wait +#define up_read sema_post +#define down_write sema_wait +#define up_write sema_post + +/** + * container_of - cast a member of a structure out to the containing structure + * + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + + +/* + * XXX + * + */ + +#define GFP_KERNEL 1 +#define EXPORT_SYMBOL(x) +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define kfree(ptr) free((void *)(uintptr_t)ptr, M_DEVBUF) +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +typedef struct mtx spinlock_t; +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY +#define wake_up wakeup +#define BUS_ID_SIZE 128 + +struct xen_bus_type +{ + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); + int (*probe)(const char *type, const char *dir); + struct xendev_list_head *bus; + int error; +#if 0 + struct bus_type bus; + struct device dev; +#endif +}; + + +extern void xenbus_backend_probe_and_watch(void); +int xenbus_probe_node(struct xen_bus_type *bus, const char *type, + const char *nodename); +int xenbus_probe_devices(struct xen_bus_type *bus); + +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus); + +void dev_changed(const char *node, struct xen_bus_type *bus); + +int +read_otherend_details(struct xenbus_device *xendev, char *id_node, + char *path_node); + +char *kasprintf(const char *fmt, ...); + + + + +#endif /* _XENBUS_COMMS_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/sys/xen/xenbus/xenbus_dev.c b/sys/xen/xenbus/xenbus_dev.c new file mode 100644 index 0000000..b1c72d9 --- /dev/null +++ b/sys/xen/xenbus/xenbus_dev.c @@ -0,0 +1,260 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/cdefs.h> +#include <sys/errno.h> +#include <sys/uio.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/conf.h> + + +#include <machine/xen/hypervisor.h> +#include <machine/xen/xenbus.h> +#include <machine/xen/hypervisor.h> +#include <xen/xenbus/xenbus_comms.h> + + + + +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define u32 uint32_t +#define simple_strtoul strtoul + +struct xenbus_dev_transaction { + LIST_ENTRY(xenbus_dev_transaction) list; + struct xenbus_transaction handle; +}; + +struct xenbus_dev_data { + /* In-progress transaction. */ + LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ +#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) + char read_buffer[PAGE_SIZE]; + unsigned int read_cons, read_prod; + int read_waitq; +}; +#if 0 +static struct proc_dir_entry *xenbus_dev_intf; +#endif +static int +xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int i = 0; + struct xenbus_dev_data *u = dev->si_drv1; + + if (wait_event_interruptible(&u->read_waitq, + u->read_prod != u->read_cons)) + return EINTR; + + for (i = 0; i < uio->uio_iov[0].iov_len; i++) { + if (u->read_cons == u->read_prod) + break; + copyout(&u->read_buffer[MASK_READ_IDX(u->read_cons)], (char *)uio->uio_iov[0].iov_base+i, 1); + u->read_cons++; + uio->uio_resid--; + } + return 0; +} + +static void queue_reply(struct xenbus_dev_data *u, + char *data, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, u->read_prod++) + u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; + + BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer)); + + wakeup(&u->read_waitq); +} + +static int +xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int err = 0; + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans; + void *reply; + int len = uio->uio_iov[0].iov_len; + + if ((len + u->len) > sizeof(u->u.buffer)) + return EINVAL; + + if (copyin(u->u.buffer + u->len, uio->uio_iov[0].iov_base, len) != 0) + return EFAULT; + + u->len += len; + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + return len; + + switch (u->u.msg.type) { + case XS_TRANSACTION_START: + case XS_TRANSACTION_END: + case XS_DIRECTORY: + case XS_READ: + case XS_GET_PERMS: + case XS_RELEASE: + case XS_GET_DOMAIN_PATH: + case XS_WRITE: + case XS_MKDIR: + case XS_RM: + case XS_SET_PERMS: + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + } else { + if (u->u.msg.type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + trans->handle.id = simple_strtoul(reply, NULL, 0); + LIST_INSERT_HEAD(&u->transactions, trans, list); + } else if (u->u.msg.type == XS_TRANSACTION_END) { + LIST_FOREACH(trans, &u->transactions, + list) + if (trans->handle.id == + u->u.msg.tx_id) + break; +#if 0 /* XXX does this mean the list is empty? */ + BUG_ON(&trans->list == &u->transactions); +#endif + LIST_REMOVE(trans, list); + kfree(trans); + } + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + queue_reply(u, (char *)reply, u->u.msg.len); + kfree(reply); + } + break; + + default: + err = EINVAL; + break; + } + + if (err == 0) { + u->len = 0; + err = len; + } + + return err; +} + +static int xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u; + + if (xen_start_info->store_evtchn == 0) + return ENOENT; +#if 0 /* XXX figure out if equiv needed */ + nonseekable_open(inode, filp); +#endif + u = kmalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return ENOMEM; + + memset(u, 0, sizeof(*u)); + LIST_INIT(&u->transactions); + + dev->si_drv1 = u; + + return 0; +} + +static int xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans, *tmp; + + LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { + xenbus_transaction_end(trans->handle, 1); + LIST_REMOVE(trans, list); + kfree(trans); + } + + kfree(u); + return 0; +} + +static struct cdevsw xenbus_dev_cdevsw = { + .d_version = D_VERSION, + .d_read = xenbus_dev_read, + .d_write = xenbus_dev_write, + .d_open = xenbus_dev_open, + .d_close = xenbus_dev_close, + .d_name = "xenbus_dev", +}; + +static int +xenbus_dev_sysinit(void) +{ + make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, "xenbus"); + + return 0; +} +SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, xenbus_dev_sysinit, NULL); +/* SYSINIT NEEDED XXX */ + + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/sys/xen/xenbus/xenbus_probe.c b/sys/xen/xenbus/xenbus_probe.c new file mode 100644 index 0000000..1b718c5 --- /dev/null +++ b/sys/xen/xenbus/xenbus_probe.c @@ -0,0 +1,1122 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/cdefs.h> +#include <sys/time.h> +#include <sys/sema.h> +#include <sys/eventhandler.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/conf.h> +#include <sys/systm.h> +#include <sys/syslog.h> +#include <sys/proc.h> +#include <sys/bus.h> +#include <sys/sx.h> + +#include <machine/xen/hypervisor.h> +#include <machine/xen/xenbus.h> +#include <machine/xen/evtchn.h> +#include <machine/stdarg.h> + +#include <xen/xenbus/xenbus_comms.h> + +struct xendev_list_head xenbus_device_frontend_list; +struct xendev_list_head xenbus_device_backend_list; +static LIST_HEAD(, xenbus_driver) xendrv_list; + +extern struct sx xenwatch_mutex; + +EVENTHANDLER_DECLARE(xenstore_event, xenstore_event_handler_t); +static struct eventhandler_list *xenstore_chain; +device_t xenbus_dev; +device_t xenbus_backend_dev; +static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device"); + +#define streq(a, b) (strcmp((a), (b)) == 0) + +static int watch_otherend(struct xenbus_device *dev); + + +/* If something in array of ids matches this device, return it. */ +static const struct xenbus_device_id * +match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) +{ + for (; !streq(arr->devicetype, ""); arr++) { + if (streq(arr->devicetype, dev->devicetype)) + return arr; + } + return NULL; +} + +#if 0 +static int xenbus_match(device_t _dev) +{ + struct xenbus_driver *drv; + struct xenbus_device *dev; + + dev = device_get_softc(_dev); + drv = dev->driver; + + if (!drv->ids) + return 0; + + return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; +} +#endif + + +/* device/<type>/<id> => <type>-<id> */ +static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) { + log(LOG_WARNING, "XENBUS: bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + log(LOG_WARNING, "XENBUS: bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + + +static void free_otherend_details(struct xenbus_device *dev) +{ + kfree((void*)(uintptr_t)dev->otherend); + dev->otherend = NULL; +} + + +static void free_otherend_watch(struct xenbus_device *dev) +{ + if (dev->otherend_watch.node) { + unregister_xenbus_watch(&dev->otherend_watch); + kfree(dev->otherend_watch.node); + dev->otherend_watch.node = NULL; + } +} + +int +read_otherend_details(struct xenbus_device *xendev, char *id_node, + char *path_node) +{ + int err = xenbus_gather(XBT_NIL, xendev->nodename, + id_node, "%i", &xendev->otherend_id, + path_node, NULL, &xendev->otherend, + NULL); + if (err) { + xenbus_dev_fatal(xendev, err, + "reading other end details from %s", + xendev->nodename); + return err; + } + if (strlen(xendev->otherend) == 0 || + !xenbus_exists(XBT_NIL, xendev->otherend, "")) { + xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s", + xendev->nodename); + kfree((void *)(uintptr_t)xendev->otherend); + xendev->otherend = NULL; + return -ENOENT; + } + + return 0; +} + + +static int read_backend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "backend-id", "backend"); +} + +#ifdef notyet +/* XXX - move to probe backend */ +static int read_frontend_details(struct xenbus_device *xendev) +{ + if (strncmp(xendev->nodename, "backend", 7)) + return -ENOENT; + return read_otherend_details(xendev, "frontend-id", "frontend"); +} +#endif + +/* Bus type for frontend drivers. */ +static int xenbus_probe_frontend(const char *type, const char *name); +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/<id> */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .bus = &xenbus_device_frontend_list, +#if 0 + /* this initialization needs to happen dynamically */ + .bus = { + .name = "xen", + .match = xenbus_match, + }, + .dev = { + .bus_id = "xen", + }, +#endif +}; + +#if 0 +static int xenbus_hotplug_backend(device_t dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + panic("implement me"); +#if 0 + struct xenbus_device *xdev; + struct xenbus_driver *drv = NULL; + int i = 0; + int length = 0; + char *basepath_end; + char *frontend_id; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + if (dev->driver) + drv = to_xenbus_driver(dev->driver); + + /* stuff we want to pass to /sbin/hotplug */ + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_BASE_PATH=%s", xdev->nodename); + + basepath_end = strrchr(envp[i - 1], '/'); + length -= strlen(basepath_end); + *basepath_end = '\0'; + basepath_end = strrchr(envp[i - 1], '/'); + length -= strlen(basepath_end); + *basepath_end = '\0'; + + basepath_end++; + frontend_id = kmalloc(strlen(basepath_end) + 1, GFP_KERNEL); + strcpy(frontend_id, basepath_end); + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_FRONTEND_ID=%s", frontend_id); + kfree(frontend_id); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; + + if (drv && drv->hotplug) + return drv->hotplug(xdev, envp, num_envp, buffer, buffer_size); + +#endif + return 0; +} +#endif + +#if 0 +static int xenbus_probe_backend(const char *type, const char *domid, int unit); +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + /* at init time */ + .bus = &xenbus_device_backend_list, +#if 0 + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .hotplug = xenbus_hotplug_backend, + }, + .dev = { + .bus_id = "xen-backend", + }, +#endif +}; +#endif + +static void otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + + struct xenbus_device *dev = (struct xenbus_device *)watch; + struct xenbus_driver *drv = dev->driver; + XenbusState state; + + /* Protect us against watches firing on old details when the otherend + details change, say immediately after a resume. */ + if (!dev->otherend || strncmp(dev->otherend, vec[XS_WATCH_PATH], + strlen(dev->otherend))) { + DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]); + return; + } + + state = xenbus_read_driver_state(dev->otherend); + + DPRINTK("state is %d, %s, %s", state, dev->otherend_watch.node, + vec[XS_WATCH_PATH]); + + /* + * Ignore xenbus transitions during shutdown. This prevents us doing + * work that can fail e.g., when the rootfs is gone. + */ +#if 0 + if (system_state > SYSTEM_RUNNING) { + struct xen_bus_type *bus = bus; + bus = container_of(dev->dev.bus, struct xen_bus_type, bus); + /* If we're frontend, drive the state machine to Closed. */ + /* This should cause the backend to release our resources. */ + if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) + xenbus_frontend_closed(dev); + return; + } +#endif + if (drv->otherend_changed) + drv->otherend_changed(dev, state); + +} + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv; + + drv = dev->driver; + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + +static int watch_otherend(struct xenbus_device *dev) +{ + return xenbus_watch_path2(dev, dev->otherend, "state", + &dev->otherend_watch, otherend_changed); +} + +static int +xenbus_dev_probe(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = dev->driver; + const struct xenbus_device_id *id; + int err; + + DPRINTK(""); + + if (!drv->probe) { + err = -ENODEV; + goto fail; + } + + id = match_device(drv->ids, dev); + if (!id) { + err = -ENODEV; + goto fail; + } + + err = talk_to_otherend(dev); + if (err) { + log(LOG_WARNING, + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); + return err; + } + + err = drv->probe(dev, id); + if (err) + goto fail; + + err = watch_otherend(dev); + if (err) { + log(LOG_WARNING, + "xenbus_probe: watch_otherend on %s failed.\n", + dev->nodename); + return err; + } + + return 0; + fail: + xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); + xenbus_switch_state(dev, XenbusStateClosed); + return -ENODEV; +} + +static void xenbus_dev_free(struct xenbus_device *xendev) +{ + LIST_REMOVE(xendev, list); + kfree(xendev); +} + +int +xenbus_remove_device(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = dev->driver; + + DPRINTK(""); + + free_otherend_watch(dev); + free_otherend_details(dev); + + if (drv->remove) + drv->remove(dev); + + xenbus_switch_state(dev, XenbusStateClosed); + + if (drv->cleanup_device) + return drv->cleanup_device(dev); + + xenbus_dev_free(dev); + + return 0; +} + +#if 0 +static int +xenbus_dev_remove(device_t _dev) +{ + return xenbus_remove_device(to_xenbus_device(_dev)); +} +#endif + +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus) +{ + struct xenbus_device *xdev; + +#if 0 + int ret; + /* this all happens in the driver itself + * doing this here simple serves to obfuscate + */ + + drv->driver.name = drv->name; + drv->driver.bus = &bus->bus; + drv->driver.owner = drv->owner; + drv->driver.probe = xenbus_dev_probe; + drv->driver.remove = xenbus_dev_remove; + + return ret; +#endif + sx_xlock(&xenwatch_mutex); + LIST_INSERT_HEAD(&xendrv_list, drv, list); + sx_xunlock(&xenwatch_mutex); + LIST_FOREACH(xdev, bus->bus, list) { + if (match_device(drv->ids, xdev)) { + xdev->driver = drv; + xenbus_dev_probe(xdev); + } + } + return 0; +} + +int xenbus_register_frontend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_backend_details; + + return xenbus_register_driver_common(drv, &xenbus_frontend); +} +EXPORT_SYMBOL(xenbus_register_frontend); + + +void xenbus_unregister_driver(struct xenbus_driver *drv) +{ +#if 0 + driver_unregister(&drv->driver); +#endif +} +EXPORT_SYMBOL(xenbus_unregister_driver); + +struct xb_find_info +{ + struct xenbus_device *dev; + const char *nodename; +}; + +static struct xenbus_device * +xenbus_device_find(const char *nodename, struct xendev_list_head *bus) +{ + struct xenbus_device *xdev; + LIST_FOREACH(xdev, bus, list) { + if (streq(xdev->nodename, nodename)) { + return xdev; +#if 0 + get_device(dev); +#endif + } + } + return NULL; +} +#if 0 +static int cleanup_dev(device_t dev, void *data) +{ + struct xenbus_device *xendev = device_get_softc(dev); + struct xb_find_info *info = data; + int len = strlen(info->nodename); + + DPRINTK("%s", info->nodename); + + if (!strncmp(xendev->nodename, info->nodename, len)) { + info->dev = xendev; +#if 0 + get_device(dev); +#endif + return 1; + } + return 0; +} + +#endif +static void xenbus_cleanup_devices(const char *path, struct xendev_list_head * bus) +{ +#if 0 + struct xb_find_info info = { .nodename = path }; + + do { + info.dev = NULL; + bus_for_each_dev(bus, NULL, &info, cleanup_dev); + if (info.dev) { + device_unregister(&info.dev->dev); + put_device(&info.dev->dev); + } + } while (info.dev); +#endif +} + +#if 0 +void xenbus_dev_release(device_t dev) +{ + /* + * nothing to do softc gets freed with the device + */ + +} +#endif +/* Simplified asprintf. */ +char *kasprintf(const char *fmt, ...) +{ + va_list ap; + unsigned int len; + char *p, dummy[1]; + + va_start(ap, fmt); + /* FIXME: vsnprintf has a bug, NULL should work */ + len = vsnprintf(dummy, 0, fmt, ap); + va_end(ap); + + p = kmalloc(len + 1, GFP_KERNEL); + if (!p) + return NULL; + va_start(ap, fmt); + vsprintf(p, fmt, ap); + va_end(ap); + return p; +} + +#if 0 +static ssize_t xendev_show_nodename(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); + +static ssize_t xendev_show_devtype(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); +#endif + +int xenbus_probe_node(struct xen_bus_type *bus, const char *type, + const char *nodename) +{ +#define CHECK_FAIL \ + do { \ + if (err) \ + goto fail; \ + } while (0) \ + + + + int err; + struct xenbus_device *xendev; + struct xenbus_driver *xdrv; + size_t stringlen; + char *tmpstring; + + XenbusState state = xenbus_read_driver_state(nodename); + + if (bus->error) + return (bus->error); + + + if (state != XenbusStateInitialising) { + /* Device is not new, so ignore it. This can happen if a + device is going away after switching to Closed. */ + return 0; + } + + stringlen = strlen(nodename) + 1 + strlen(type) + 1; + xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) + return -ENOMEM; + memset(xendev, 0, sizeof(*xendev)); + xendev->state = XenbusStateInitialising; + + /* Copy the strings into the extra space. */ + + tmpstring = (char *)(xendev + 1); + strcpy(tmpstring, nodename); + xendev->nodename = tmpstring; + + tmpstring += strlen(tmpstring) + 1; + strcpy(tmpstring, type); + xendev->devicetype = tmpstring; + /* + * equivalent to device registration + * events + */ + LIST_INSERT_HEAD(bus->bus, xendev, list); + LIST_FOREACH(xdrv, &xendrv_list, list) { + if (match_device(xdrv->ids, xendev)) { + xendev->driver = xdrv; + if (!xenbus_dev_probe(xendev)) + break; + } + } + +#if 0 + xendev->dev.parent = &bus->dev; + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); + CHECK_FAIL; + + /* Register with generic device framework. */ + err = device_register(&xendev->dev); + CHECK_FAIL; + + device_create_file(&xendev->dev, &dev_attr_nodename); + device_create_file(&xendev->dev, &dev_attr_devtype); +#endif + return 0; + +#undef CHECK_FAIL +#if 0 + fail: + xenbus_dev_free(xendev); +#endif + return err; +} + +/* device/<typename>/<name> */ +static int xenbus_probe_frontend(const char *type, const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(&xenbus_frontend, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) +{ + int err = 0; + char **dir; + unsigned int dir_n = 0; + int i; + + dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = bus->probe(type, dir[i]); + if (err) + break; + } + kfree(dir); + return err; +} + +int xenbus_probe_devices(struct xen_bus_type *bus) +{ + int err = 0; + char **dir; + unsigned int i, dir_n; + + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_device_type(bus, dir[i]); + if (err) + break; + } + kfree(dir); + + return err; +} + +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +void dev_changed(const char *node, struct xen_bus_type *bus) +{ + int exists, rootlen; + struct xenbus_device *dev; + char type[BUS_ID_SIZE]; + const char *p; + char *root; + + DPRINTK(""); + if (char_count(node, '/') < 2) + return; + + exists = xenbus_exists(XBT_NIL, node, ""); + if (!exists) { + xenbus_cleanup_devices(node, bus->bus); + return; + } + + /* backend/<type>/... or device/<type>/... */ + p = strchr(node, '/') + 1; + snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[BUS_ID_SIZE-1] = '\0'; + + rootlen = strsep_len(node, '/', bus->levels); + if (rootlen < 0) + return; + root = kasprintf("%.*s", rootlen, node); + if (!root) + return; + + dev = xenbus_device_find(root, bus->bus); + if (!dev) + xenbus_probe_node(bus, type, root); +#if 0 + else + put_device(&dev->dev); +#endif + kfree(root); +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +#ifdef notyet + +static int suspend_dev(device_t dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + xdev = device_get_softc(dev); + + drv = xdev->driver; + + if (device_get_driver(dev) == NULL) + return 0; + + if (drv->suspend) + err = drv->suspend(xdev); +#if 0 + /* bus_id ? */ + if (err) + log(LOG_WARNING, "xenbus: suspend %s failed: %i\n", + dev->bus_id, err); +#endif + return 0; +} + + + +static int resume_dev(device_t dev, void *data) +{ + int err; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (device_get_driver(dev) == NULL) + return 0; + xdev = device_get_softc(dev); + drv = xdev->driver; + + err = talk_to_otherend(xdev); +#if 0 + if (err) { + log(LOG_WARNING, + "xenbus: resume (talk_to_otherend) %s failed: %i\n", + dev->bus_id, err); + return err; + } +#endif + if (drv->resume) + err = drv->resume(xdev); + + err = watch_otherend(xdev); +#if 0 + /* bus_id? */ + if (err) + log(LOG_WARNING, + "xenbus: resume %s failed: %i\n", dev->bus_id, err); +#endif + return err; +} + +#endif +void xenbus_suspend(void) +{ + DPRINTK(""); + panic("implement me"); +#if 0 + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev); +#endif + xs_suspend(); +} +EXPORT_SYMBOL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + panic("implement me"); +#if 0 + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev); +#endif +} +EXPORT_SYMBOL(xenbus_resume); + +#if 0 +static device_t +xenbus_add_child(device_t bus, int order, const char *name, int unit) +{ + device_t child; + + child = device_add_child_ordered(bus, order, name, unit); + + return(child); +} +#endif + +/* A flag to determine if xenstored is 'ready' (i.e. has started) */ +int xenstored_ready = 0; + + +int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority) +{ + int ret = 0; + + if (xenstored_ready > 0) + ret = func(NULL); + else + eventhandler_register(xenstore_chain, "xenstore", func, arg, priority); + + return ret; +} +EXPORT_SYMBOL(register_xenstore_notifier); +#if 0 +void unregister_xenstore_notifier(struct notifier_block *nb) +{ + notifier_chain_unregister(&xenstore_chain, nb); +} +EXPORT_SYMBOL(unregister_xenstore_notifier); +#endif + + + +#ifdef DOM0 +static struct proc_dir_entry *xsd_mfn_intf; +static struct proc_dir_entry *xsd_port_intf; + + +static int xsd_mfn_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + len = sprintf(page, "%ld", xen_start_info->store_mfn); + *eof = 1; + return len; +} + +static int xsd_port_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d", xen_start_info->store_evtchn); + *eof = 1; + return len; +} + +#endif +static int +xenbus_probe_sysinit(void *unused) +{ + int err = 0, dom0; + + DPRINTK(""); + + LIST_INIT(&xenbus_device_frontend_list); + LIST_INIT(&xenbus_device_backend_list); + LIST_INIT(&xendrv_list); +#if 0 + if (xen_init() < 0) { + DPRINTK("failed"); + return -ENODEV; + } + + + /* Register ourselves with the kernel bus & device subsystems */ + bus_register(&xenbus_frontend.bus); + bus_register(&xenbus_backend.bus); + device_register(&xenbus_frontend.dev); + device_register(&xenbus_backend.dev); +#endif + + /* + ** Domain0 doesn't have a store_evtchn or store_mfn yet. + */ + dom0 = (xen_start_info->store_evtchn == 0); + + +#ifdef DOM0 + if (dom0) { + + unsigned long page; + evtchn_op_t op = { 0 }; + int ret; + + + /* Allocate page. */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + /* We don't refcnt properly, so set reserved on page. + * (this allocation is permanent) */ + SetPageReserved(virt_to_page(page)); + + xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + op.cmd = EVTCHNOP_alloc_unbound; + op.u.alloc_unbound.dom = DOMID_SELF; + op.u.alloc_unbound.remote_dom = 0; + + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); + xen_start_info->store_evtchn = op.u.alloc_unbound.port; + + /* And finally publish the above info in /proc/xen */ + if((xsd_mfn_intf = create_xen_proc_entry("xsd_mfn", 0400))) + xsd_mfn_intf->read_proc = xsd_mfn_read; + if((xsd_port_intf = create_xen_proc_entry("xsd_port", 0400))) + xsd_port_intf->read_proc = xsd_port_read; + } +#endif + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing xenstore comms: %i\n", err); + return err; + } + + if (!dom0) { + xenstored_ready = 1; +#if 0 + xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0); + if (xenbus_dev == NULL) + panic("xenbus: could not attach"); + xenbus_backend_dev = BUS_ADD_CHILD(parent, 0, "xb_be", 0); + if (xenbus_backend_dev == NULL) + panic("xenbus: could not attach"); +#endif + BUG_ON((xenstored_ready <= 0)); + + + + /* Enumerate devices in xenstore. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + xenbus_backend_probe_and_watch(); + + + /* Notify others that xenstore is up */ + EVENTHANDLER_INVOKE(xenstore_event); + } + + return 0; +} + +SYSINIT(xenbus_probe_sysinit, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_probe_sysinit, NULL); + +#if 0 +static device_method_t xenbus_methods[] = { + /* Device interface */ +#if 0 + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, xenbus_attach), + + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), +#endif + DEVMETHOD(device_suspend, xenbus_suspend), + DEVMETHOD(device_resume, xenbus_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_add_child, xenbus_add_child), + DEVMETHOD(bus_read_ivar, bus_generic_read_ivar), + DEVMETHOD(bus_write_ivar, bus_generic_write_ivar), +#if 0 + DEVMETHOD(bus_set_resource, bus_generic_set_resource), + DEVMETHOD(bus_get_resource, bus_generic_get_resource), +#endif + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), +#if 0 + DEVMETHOD(bus_delete_resource, bus_generic_delete_resource), +#endif + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + + { 0, 0 } +}; + +static char driver_name[] = "xenbus"; +static driver_t xenbus_driver = { + driver_name, + xenbus_methods, + sizeof(struct xenbus_device), +}; +devclass_t xenbus_devclass; + +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); + +#endif + + + + + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ diff --git a/sys/xen/xenbus/xenbus_probe_backend.c b/sys/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 0000000..5629c53 --- /dev/null +++ b/sys/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,310 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/cdefs.h> +#include <sys/time.h> +#include <sys/sema.h> +#include <sys/eventhandler.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/conf.h> +#include <sys/systm.h> +#include <sys/syslog.h> +#include <sys/proc.h> +#include <sys/bus.h> +#include <sys/sx.h> + +#include <machine/xen/hypervisor.h> +#include <machine/xen/xenbus.h> +#include <machine/xen/evtchn.h> +#include <machine/stdarg.h> + +#include <xen/xenbus/xenbus_comms.h> + +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY +#define wake_up wakeup + +extern struct xendev_list_head xenbus_device_backend_list; +#if 0 +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); +#endif +static int xenbus_probe_backend(const char *type, const char *domid); + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "frontend-id", "frontend"); +} + +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, BUS_ID_SIZE, + "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .bus = &xenbus_device_backend_list, + +#if 0 + .error = -ENODEV, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_backend, + }, + .dev = { + .bus_id = "xen-backend", + }, +#endif +}; + +#if 0 +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + int i = 0; + int length = 0; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; +2 + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_BASE_PATH=%s", xenbus_backend.root); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, envp, num_envp, buffer, + buffer_size); + } + + return 0; +} +#endif + +int xenbus_register_backend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend); +} + +/* backend/<typename>/<frontend-uuid>/<name> */ +static int xenbus_probe_backend_unit(const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf("%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(&xenbus_backend, type, nodename); + kfree(nodename); + return err; +} + +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(const char *type, const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; +#if 0 +void xenbus_backend_suspend(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} + +void xenbus_backend_resume(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} +#endif +void xenbus_backend_probe_and_watch(void) +{ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); +} + +#if 0 +void xenbus_backend_bus_register(void) +{ + xenbus_backend.error = bus_register(&xenbus_backend.bus); + if (xenbus_backend.error) + log(LOG_WARNING, + "XENBUS: Error registering backend bus: %i\n", + xenbus_backend.error); +} + +void xenbus_backend_device_register(void) +{ + if (xenbus_backend.error) + return; + + xenbus_backend.error = device_register(&xenbus_backend.dev); + if (xenbus_backend.error) { + bus_unregister(&xenbus_backend.bus); + log(LOG_WARNING, + "XENBUS: Error registering backend device: %i\n", + xenbus_backend.error); + } +} +#endif diff --git a/sys/xen/xenbus/xenbus_xs.c b/sys/xen/xenbus/xenbus_xs.c new file mode 100644 index 0000000..67a7fe8 --- /dev/null +++ b/sys/xen/xenbus/xenbus_xs.c @@ -0,0 +1,876 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/cdefs.h> +#include <sys/unistd.h> +#include <sys/errno.h> +#include <sys/uio.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/sema.h> +#include <sys/syslog.h> +#include <sys/malloc.h> +#include <sys/libkern.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/kthread.h> + +#include <machine/xen/hypervisor.h> +#include <machine/xen/xenbus.h> +#include <machine/stdarg.h> + +#include <xen/xenbus/xenbus_comms.h> +int xs_process_msg(void); + +#define kmalloc(size, unused) malloc(size, M_DEVBUF, M_WAITOK) +#define BUG_ON PANIC_IF +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define spin_lock mtx_lock +#define spin_unlock mtx_unlock +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY + +#define streq(a, b) (strcmp((a), (b)) == 0) + +struct kvec { + const void *iov_base; + size_t iov_len; +}; + +struct xs_stored_msg { + TAILQ_ENTRY(xs_stored_msg) list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list; + spinlock_t reply_lock; + int reply_waitq; + + /* One request at a time. */ + struct sx request_mutex; + + /* Protect transactions against save/restore. */ + struct rw_semaphore suspend_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watch_list_head, xenbus_watch) watches; +static DEFINE_SPINLOCK(watches_lock); +/* List of pending watch callback events, and a lock to protect it. */ +static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events; +static DEFINE_SPINLOCK(watch_events_lock); +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +struct sx xenwatch_mutex; +static int watch_events_waitq; + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + log(LOG_WARNING, "XENBUS xen store gave: unknown error %s", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +extern int scheduler_running; + +static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xs_stored_msg *msg; + char *body; + int i; + + if (scheduler_running == 0) { + /* + * Give other domain time to run :-/ + */ + for (i = 0; i < 10000; i++) + HYPERVISOR_yield(); + xs_process_msg(); + } + + spin_lock(&xs_state.reply_lock); + + while (list_empty(&xs_state.reply_list)) { + spin_unlock(&xs_state.reply_lock); + wait_event_interruptible(&xs_state.reply_waitq, + !list_empty(&xs_state.reply_list)); + spin_lock(&xs_state.reply_lock); + } + + msg = TAILQ_FIRST(&xs_state.reply_list); + list_del(&xs_state.reply_list, msg); + + spin_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + kfree(msg); + + return body; +} + +#if 0 +/* Emergency write. UNUSED*/ +void xenbus_debug_write(const char *str, unsigned int count) +{ + struct xsd_sockmsg msg = { 0 }; + + msg.type = XS_DEBUG; + msg.len = sizeof("print") + count + 1; + + sx_xlock(&xs_state.request_mutex); + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); + sx_xunlock(&xs_state.request_mutex); +} + +#endif +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) +{ + void *ret; + struct xsd_sockmsg req_msg = *msg; + int err; + + if (req_msg.type == XS_TRANSACTION_START) + down_read(&xs_state.suspend_mutex); + + sx_xlock(&xs_state.request_mutex); + + err = xb_write(msg, sizeof(*msg) + msg->len); + if (err) { + msg->type = XS_ERROR; + ret = ERR_PTR(err); + } else { + ret = read_reply(&msg->type, &msg->len); + } + + sx_xunlock(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + up_read(&xs_state.suspend_mutex); + + return ret; +} + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + msg.tx_id = t.id; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + sx_xlock(&xs_state.request_mutex); + + err = xb_write(&msg, sizeof(msg)); + if (err) { + sx_xunlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + + for (i = 0; i < num_vecs; i++) { + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; + if (err) { + sx_xunlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + } + + ret = read_reply(&msg.type, len); + + sx_xunlock(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; + + if (msg.type == XS_ERROR) { + err = get_error(ret); + kfree(ret); + return ERR_PTR(-err); + } + + BUG_ON(msg.type != type); + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (const void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(t, type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + kfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char *join(const char *dir, const char *name) +{ + char *buffer; + + buffer = kmalloc(strlen(dir) + strlen("/") + strlen(name) + 1, + GFP_KERNEL); + if (buffer == NULL) + return ERR_PTR(-ENOMEM); + + strcpy(buffer, dir); + if (!streq(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + + return buffer; +} + +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); + if (!ret) { + kfree(strings); + return ERR_PTR(-ENOMEM); + } + memcpy(&ret[*num], strings, len); + kfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + kfree(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); +} +EXPORT_SYMBOL(xenbus_directory); + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(t, dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + kfree(d); + return 1; +} +EXPORT_SYMBOL(xenbus_exists); + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_read); + +/* Write the value of a single file. + * Returns -err on failure. + */ +int xenbus_write(struct xenbus_transaction t, + const char *dir, const char *node, const char *string) +{ + char *path; + struct kvec iovec[2]; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + iovec[0].iov_base = path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_write); + +/* Create a new directory. */ +int xenbus_mkdir(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_mkdir); + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL(xenbus_rm); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + + down_read(&xs_state.suspend_mutex); + + id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) { + up_read(&xs_state.suspend_mutex); + return PTR_ERR(id_str); + } + + t->id = simple_strtoul(id_str, NULL, 0); + kfree(id_str); + + return 0; +} +EXPORT_SYMBOL(xenbus_transaction_start); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + int err; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); + + up_read(&xs_state.suspend_mutex); + + return err; +} +EXPORT_SYMBOL(xenbus_transaction_end); + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(t, dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} +EXPORT_SYMBOL(xenbus_scanf); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + return -ENOMEM; + + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + va_end(ap); + + BUG_ON(ret > PRINTF_BUFFER_SIZE-1); + ret = xenbus_write(t, dir, node, printf_buffer); + + kfree(printf_buffer); + + return ret; +} +EXPORT_SYMBOL(xenbus_printf); + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(t, dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + kfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} +EXPORT_SYMBOL(xenbus_gather); + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + LIST_FOREACH(i, &watches, list) + if (i == cmp) + return i; + + return NULL; +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + BUG_ON(find_watch(token) != NULL); + LIST_INSERT_HEAD(&watches, watch, list); + spin_unlock(&watches_lock); + + err = xs_watch(watch->node, token); + + /* Ignore errors due to multiple registration. */ + if ((err != 0) && (err != -EEXIST)) { + spin_lock(&watches_lock); + LIST_REMOVE(watch, list); + spin_unlock(&watches_lock); + } + + up_read(&xs_state.suspend_mutex); + + return err; +} +EXPORT_SYMBOL(register_xenbus_watch); + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + BUG_ON(!find_watch(token)); + LIST_REMOVE(watch, list); + spin_unlock(&watches_lock); + + err = xs_unwatch(watch->node, token); + if (err) + log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n", + watch->node, err); + + up_read(&xs_state.suspend_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) { + if (msg->u.watch.handle != watch) + continue; + list_del(&watch_events, msg); + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watch_events_lock); + + /* Flush any currently-executing callback, unless we are it. :-) */ + if (curproc->p_pid != xenwatch_pid) { + sx_xlock(&xenwatch_mutex); + sx_xunlock(&xenwatch_mutex); + } +} +EXPORT_SYMBOL(unregister_xenbus_watch); + +void xs_suspend(void) +{ + down_write(&xs_state.suspend_mutex); + sx_xlock(&xs_state.request_mutex); +} + +void xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + sx_xunlock(&xs_state.request_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ + LIST_FOREACH(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + up_write(&xs_state.suspend_mutex); +} + +static void xenwatch_thread(void *unused) +{ + struct xs_stored_msg *msg; + + for (;;) { + wait_event_interruptible(&watch_events_waitq, + !list_empty(&watch_events)); + + sx_xlock(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + msg = TAILQ_FIRST(&watch_events); + if (msg) + list_del(&watch_events, msg); + spin_unlock(&watch_events_lock); + + if (msg != NULL) { + + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + kfree(msg->u.watch.vec); + kfree(msg); + } + + sx_xunlock(&xenwatch_mutex); + } +} + +int xs_process_msg(void) +{ + struct xs_stored_msg *msg; + char *body; + int err; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (msg == NULL) + return -ENOMEM; + + err = xb_read(&msg->hdr, sizeof(msg->hdr)); + if (err) { + kfree(msg); + return err; + } + + body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); + if (body == NULL) { + kfree(msg); + return -ENOMEM; + } + + err = xb_read(body, msg->hdr.len); + if (err) { + kfree(body); + kfree(msg); + return err; + } + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + if (IS_ERR(msg->u.watch.vec)) { + kfree(msg); + return PTR_ERR(msg->u.watch.vec); + } + + spin_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + spin_lock(&watch_events_lock); + TAILQ_INSERT_TAIL(&watch_events, msg, list); + wakeup(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else { + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + spin_lock(&xs_state.reply_lock); + TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list); + spin_unlock(&xs_state.reply_lock); + wakeup(&xs_state.reply_waitq); + } + + return 0; +} + +static void xenbus_thread(void *unused) +{ + int err; + + for (;;) { + err = xs_process_msg(); + if (err) + printf("XENBUS error %d while reading " + "message\n", err); + } +} + +int xs_init(void) +{ + int err; + struct proc *p; + + TAILQ_INIT(&xs_state.reply_list); + TAILQ_INIT(&watch_events); + mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF); + sema_init(&xs_state.suspend_mutex, 1, "xenstore suspend"); + sx_init(&xenwatch_mutex, "xenwatch"); + sx_init(&xs_state.request_mutex, "xenstore request"); + +#if 0 + mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF); + sema_init(&xs_state.request_mutex, 1, "xenstore request"); + sema_init(&xenwatch_mutex, 1, "xenwatch"); +#endif + mtx_init(&watches_lock, "watches", NULL, MTX_DEF); + mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF); + + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + return err; + + err = kproc_create(xenwatch_thread, NULL, &p, + RFHIGHPID, 0, "xenwatch"); + if (err) + return err; + xenwatch_pid = p->p_pid; + + err = kproc_create(xenbus_thread, NULL, NULL, + RFHIGHPID, 0, "xenbus"); + + return err; +} + + +/* + * Local variables: + * c-file-style: "bsd" + * indent-tabs-mode: t + * c-indent-level: 4 + * c-basic-offset: 8 + * tab-width: 4 + * End: + */ |