201 files changed, 61516 insertions, 0 deletions
diff --git a/sys/amd64/Makefile b/sys/amd64/Makefile
new file mode 100644
index 0000000..3925e74
--- /dev/null
+++ b/sys/amd64/Makefile
@@ -0,0 +1,39 @@
+# $FreeBSD$
+#	@(#)Makefile	8.1 (Berkeley) 6/11/93
+
+# Makefile for amd64 links, tags file
+
+# SYS is normally set in Make.tags.inc
+SYS=/sys
+
+TAGDIR=	amd64
+
+.include "../kern/Make.tags.inc"
+
+all:
+	@echo "make links or tags only"
+
+# Directories in which to place amd64 tags links
+DAMD64=	acpica amd64 ia32 include isa linux32 pci
+
+links::
+	-for i in ${COMMDIR1}; do \
+	    (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done
+	-for i in ${COMMDIR2}; do \
+	    (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done
+	-for i in ${DAMD64}; do \
+	    (cd $$i && { rm -f tags; ln -s ../tags tags; }) done
+
+SAMD64=	${SYS}/amd64/acpica/*.[ch] \
+	${SYS}/amd64/amd64/*.[ch] ${SYS}/amd64/ia32/*.[ch] \
+	${SYS}/amd64/include/*.[ch] ${SYS}/amd64/isa/*.[ch] \
+	${SYS}/amd64/linux32/*.[ch] ${SYS}/amd64/pci/*.[ch]
+AAMD64=	${SYS}/amd64/amd64/*.S
+
+tags::
+	-ctags -wdt ${COMM} ${SAMD64}
+	egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AAMD64} | \
+	    sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \
+		>> tags
+	sort -o tags tags
+	chmod 444 tags
diff --git a/sys/amd64/acpica/acpi_machdep.c b/sys/amd64/acpica/acpi_machdep.c
new file mode 100644
index 0000000..e5dd4c3
--- /dev/null
+++ b/sys/amd64/acpica/acpi_machdep.c
@@ -0,0 +1,385 @@
+/*-
+ * Copyright (c) 2001 Mitsuru IWASAKI
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <contrib/dev/acpica/include/actables.h>
+
+#include <dev/acpica/acpivar.h>
+
+#include <machine/nexusvar.h>
+
+int acpi_resume_beep;
+TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep);
+SYSCTL_INT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep,
+    0, "Beep the PC speaker when resuming");
+
+int acpi_reset_video;
+TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video);
+
+static int intr_model = ACPI_INTR_PIC;
+
+int
+acpi_machdep_init(device_t dev)
+{
+	struct acpi_softc *sc;
+
+	sc = device_get_softc(dev);
+
+	acpi_apm_init(sc);
+
+	if (intr_model != ACPI_INTR_PIC)
+		acpi_SetIntrModel(intr_model);
+
+	SYSCTL_ADD_INT(&sc->acpi_sysctl_ctx,
+	    SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO,
+	    "reset_video", CTLFLAG_RW, &acpi_reset_video, 0,
+	    "Call the VESA reset BIOS vector on the resume path");
+
+	return (0);
+}
+
+void
+acpi_SetDefaultIntrModel(int model)
+{
+
+	intr_model = model;
+}
+
+int
+acpi_machdep_quirks(int *quirks)
+{
+
+	return (0);
+}
+
+void
+acpi_cpu_c1()
+{
+
+	__asm __volatile("sti; hlt");
+}
+
+/*
+ * Support for mapping ACPI tables during early boot.  Currently this
+ * uses the crashdump map to map each table.  However, the crashdump
+ * map is created in pmap_bootstrap() right after the direct map, so
+ * we should be able to just use pmap_mapbios() here instead.
+ *
+ * This makes the following assumptions about how we use this KVA:
+ * pages 0 and 1 are used to map in the header of each table found via
+ * the RSDT or XSDT and pages 2 to n are used to map in the RSDT or
+ * XSDT.  This has to use 2 pages for the table headers in case a
+ * header spans a page boundary.
+ *
+ * XXX: We don't ensure the table fits in the available address space
+ * in the crashdump map.
+ */
+
+/*
+ * Map some memory using the crashdump map.  'offset' is an offset in
+ * pages into the crashdump map to use for the start of the mapping.
+ */
+static void *
+table_map(vm_paddr_t pa, int offset, vm_offset_t length)
+{
+	vm_offset_t va, off;
+	void *data;
+
+	off = pa & PAGE_MASK;
+	length = round_page(length + off);
+	pa = pa & PG_FRAME;
+	va = (vm_offset_t)pmap_kenter_temporary(pa, offset) +
+	    (offset * PAGE_SIZE);
+	data = (void *)(va + off);
+	length -= PAGE_SIZE;
+	while (length > 0) {
+		va += PAGE_SIZE;
+		pa += PAGE_SIZE;
+		length -= PAGE_SIZE;
+		pmap_kenter(va, pa);
+		invlpg(va);
+	}
+	return (data);
+}
+
+/* Unmap memory previously mapped with table_map(). */
+static void
+table_unmap(void *data, vm_offset_t length)
+{
+	vm_offset_t va, off;
+
+	va = (vm_offset_t)data;
+	off = va & PAGE_MASK;
+	length = round_page(length + off);
+	va &= ~PAGE_MASK;
+	while (length > 0) {
+		pmap_kremove(va);
+		invlpg(va);
+		va += PAGE_SIZE;
+		length -= PAGE_SIZE;
+	}
+}
+
+/*
+ * Map a table at a given offset into the crashdump map.  It first
+ * maps the header to determine the table length and then maps the
+ * entire table.
+ */
+static void *
+map_table(vm_paddr_t pa, int offset, const char *sig)
+{
+	ACPI_TABLE_HEADER *header;
+	vm_offset_t length;
+	void *table;
+
+	header = table_map(pa, offset, sizeof(ACPI_TABLE_HEADER));
+	if (strncmp(header->Signature, sig, ACPI_NAME_SIZE) != 0) {
+		table_unmap(header, sizeof(ACPI_TABLE_HEADER));
+		return (NULL);
+	}
+	length = header->Length;
+	table_unmap(header, sizeof(ACPI_TABLE_HEADER));
+	table = table_map(pa, offset, length);
+	if (ACPI_FAILURE(AcpiTbChecksum(table, length))) {
+		if (bootverbose)
+			printf("ACPI: Failed checksum for table %s\n", sig);
+#if (ACPI_CHECKSUM_ABORT)
+		table_unmap(table, length);
+		return (NULL);
+#endif
+	}
+	return (table);
+}
+
+/*
+ * See if a given ACPI table is the requested table.  Returns the
+ * length of the able if it matches or zero on failure.
+ */
+static int
+probe_table(vm_paddr_t address, const char *sig)
+{
+	ACPI_TABLE_HEADER *table;
+
+	table = table_map(address, 0, sizeof(ACPI_TABLE_HEADER));
+	if (table == NULL) {
+		if (bootverbose)
+			printf("ACPI: Failed to map table at 0x%jx\n",
+			    (uintmax_t)address);
+		return (0);
+	}
+	if (bootverbose)
+		printf("Table '%.4s' at 0x%jx\n", table->Signature,
+		    (uintmax_t)address);
+
+	if (strncmp(table->Signature, sig, ACPI_NAME_SIZE) != 0) {
+		table_unmap(table, sizeof(ACPI_TABLE_HEADER));
+		return (0);
+	}
+	table_unmap(table, sizeof(ACPI_TABLE_HEADER));
+	return (1);
+}
+
+/*
+ * Try to map a table at a given physical address previously returned
+ * by acpi_find_table().
+ */
+void *
+acpi_map_table(vm_paddr_t pa, const char *sig)
+{
+
+	return (map_table(pa, 0, sig));
+}
+
+/* Unmap a table previously mapped via acpi_map_table(). */
+void
+acpi_unmap_table(void *table)
+{
+	ACPI_TABLE_HEADER *header;
+
+	header = (ACPI_TABLE_HEADER *)table;
+	table_unmap(table, header->Length);
+}
+
+/*
+ * Return the physical address of the requested table or zero if one
+ * is not found.
+ */
+vm_paddr_t
+acpi_find_table(const char *sig)
+{
+	ACPI_PHYSICAL_ADDRESS rsdp_ptr;
+	ACPI_TABLE_RSDP *rsdp;
+	ACPI_TABLE_RSDT *rsdt;
+	ACPI_TABLE_XSDT *xsdt;
+	ACPI_TABLE_HEADER *table;
+	vm_paddr_t addr;
+	int i, count;
+
+	if (resource_disabled("acpi", 0))
+		return (0);
+
+	/*
+	 * Map in the RSDP.  Since ACPI uses AcpiOsMapMemory() which in turn
+	 * calls pmap_mapbios() to find the RSDP, we assume that we can use
+	 * pmap_mapbios() to map the RSDP.
+	 */
+	if ((rsdp_ptr = AcpiOsGetRootPointer()) == 0)
+		return (0);
+	rsdp = pmap_mapbios(rsdp_ptr, sizeof(ACPI_TABLE_RSDP));
+	if (rsdp == NULL) {
+		if (bootverbose)
+			printf("ACPI: Failed to map RSDP\n");
+		return (0);
+	}
+
+	/*
+	 * For ACPI >= 2.0, use the XSDT if it is available.
+	 * Otherwise, use the RSDT.  We map the XSDT or RSDT at page 2
+	 * in the crashdump area.  Pages 0 and 1 are used to map in the
+	 * headers of candidate ACPI tables.
+	 */
+	addr = 0;
+	if (rsdp->Revision >= 2 && rsdp->XsdtPhysicalAddress != 0) {
+		/*
+		 * AcpiOsGetRootPointer only verifies the checksum for
+		 * the version 1.0 portion of the RSDP.  Version 2.0 has
+		 * an additional checksum that we verify first.
+		 */
+		if (AcpiTbChecksum((UINT8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH)) {
+			if (bootverbose)
+				printf("ACPI: RSDP failed extended checksum\n");
+			return (0);
+		}
+		xsdt = map_table(rsdp->XsdtPhysicalAddress, 2, ACPI_SIG_XSDT);
+		if (xsdt == NULL) {
+			if (bootverbose)
+				printf("ACPI: Failed to map XSDT\n");
+			return (0);
+		}
+		count = (xsdt->Header.Length - sizeof(ACPI_TABLE_HEADER)) /
+		    sizeof(UINT64);
+		for (i = 0; i < count; i++)
+			if (probe_table(xsdt->TableOffsetEntry[i], sig)) {
+				addr = xsdt->TableOffsetEntry[i];
+				break;
+			}
+		acpi_unmap_table(xsdt);
+	} else {
+		rsdt = map_table(rsdp->RsdtPhysicalAddress, 2, ACPI_SIG_RSDT);
+		if (rsdt == NULL) {
+			if (bootverbose)
+				printf("ACPI: Failed to map RSDT\n");
+			return (0);
+		}
+		count = (rsdt->Header.Length - sizeof(ACPI_TABLE_HEADER)) /
+		    sizeof(UINT32);
+		for (i = 0; i < count; i++)
+			if (probe_table(rsdt->TableOffsetEntry[i], sig)) {
+				addr = rsdt->TableOffsetEntry[i];
+				break;
+			}
+		acpi_unmap_table(rsdt);
+	}
+	pmap_unmapbios((vm_offset_t)rsdp, sizeof(ACPI_TABLE_RSDP));
+	if (addr == 0) {
+		if (bootverbose)
+			printf("ACPI: No %s table found\n", sig);
+		return (0);
+	}
+	if (bootverbose)
+		printf("%s: Found table at 0x%jx\n", sig, (uintmax_t)addr);
+
+	/*
+	 * Verify that we can map the full table and that its checksum is
+	 * correct, etc.
+	 */
+	table = map_table(addr, 0, sig);
+	if (table == NULL)
+		return (0);
+	acpi_unmap_table(table);
+
+	return (addr);
+}
+
+/*
+ * ACPI nexus(4) driver.
+ */
+static int
+nexus_acpi_probe(device_t dev)
+{
+	int error;
+
+	error = acpi_identify();
+	if (error)
+		return (error);
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+nexus_acpi_attach(device_t dev)
+{
+	device_t acpi_dev;
+	int error;
+
+	nexus_init_resources();
+	bus_generic_probe(dev);
+	acpi_dev = BUS_ADD_CHILD(dev, 10, "acpi", 0);
+	if (acpi_dev == NULL)
+		panic("failed to add acpi0 device");
+
+	error = bus_generic_attach(dev);
+	if (error == 0)
+		acpi_install_wakeup_handler(device_get_softc(acpi_dev));
+
+	return (error);
+}
+
+static device_method_t nexus_acpi_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		nexus_acpi_probe),
+	DEVMETHOD(device_attach,	nexus_acpi_attach),
+
+	{ 0, 0 }
+};
+
+DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1, nexus_driver);
+static devclass_t nexus_devclass;
+
+DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_devclass, 0, 0);
diff --git a/sys/amd64/acpica/acpi_wakecode.S b/sys/amd64/acpica/acpi_wakecode.S
new file mode 100644
index 0000000..c4b0dcd
--- /dev/null
+++ b/sys/amd64/acpica/acpi_wakecode.S
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
+ * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2008-2012 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/ppireg.h>
+#include <machine/specialreg.h>
+#include <machine/timerreg.h>
+
+#include "assym.s"
+
+/*
+ * Resume entry point for real mode.
+ *
+ * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero
+ * in FACS, the BIOS enters here in real mode after POST with CS set to
+ * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf).
+ * Depending on the previous sleep state, we may need to initialize more
+ * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk).
+ *
+ * Note: If XFirmwareWakingVector is non-zero, it should disable address
+ * translation/paging and interrupts, load all segment registers with
+ * a flat 4 GB address space, and set EFLAGS.IF to zero.  Currently
+ * this mode is not supported by this code.
+ */
+
+	.data				/* So we can modify it */
+
+	ALIGN_TEXT
+	.code16
+wakeup_start:
+	/*
+	 * Set up segment registers for real mode, a small stack for
+	 * any calls we make, and clear any flags.
+	 */
+	cli				/* make sure no interrupts */
+	mov	%cs, %ax		/* copy %cs to %ds.  Remember these */
+	mov	%ax, %ds		/* are offsets rather than selectors */
+	mov	%ax, %ss
+	movw	$PAGE_SIZE, %sp
+	xorw	%ax, %ax
+	pushw	%ax
+	popfw
+
+	/* To debug resume hangs, beep the speaker if the user requested. */
+	testb	$~0, resume_beep - wakeup_start
+	jz	1f
+	movb	$0, resume_beep - wakeup_start
+
+	/* Set PIC timer2 to beep. */
+	movb	$(TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT), %al
+	outb	%al, $TIMER_MODE
+
+	/* Turn on speaker. */
+	inb	$IO_PPI, %al
+	orb	$PIT_SPKR, %al
+	outb	%al, $IO_PPI
+
+	/* Set frequency. */
+	movw	$0x4c0, %ax
+	outb	%al, $TIMER_CNTR2
+	shrw	$8, %ax
+	outb	%al, $TIMER_CNTR2
+1:
+
+	/* Re-initialize video BIOS if the reset_video tunable is set. */
+	testb	$~0, reset_video - wakeup_start
+	jz	1f
+	movb	$0, reset_video - wakeup_start
+	lcall	$0xc000, $3
+
+	/* When we reach here, int 0x10 should be ready.  Hide cursor. */
+	movb	$0x01, %ah
+	movb	$0x20, %ch
+	int	$0x10
+
+	/* Re-start in case the previous BIOS call clobbers them. */
+	jmp	wakeup_start
+1:
+
+	/*
+	 * Find relocation base and patch the gdt descript and ljmp targets
+	 */
+	xorl	%ebx, %ebx
+	mov	%cs, %bx
+	sall	$4, %ebx		/* %ebx is now our relocation base */
+
+	/*
+	 * Load the descriptor table pointer.  We'll need it when running
+	 * in 16-bit protected mode.
+	 */
+	lgdtl	bootgdtdesc - wakeup_start
+
+	/* Enable protected mode */
+	movl	$CR0_PE, %eax
+	mov	%eax, %cr0
+
+	/*
+	 * Now execute a far jump to turn on protected mode.  This
+	 * causes the segment registers to turn into selectors and causes
+	 * %cs to be loaded from the gdt.
+	 *
+	 * The following instruction is:
+	 * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start
+	 * but gas cannot assemble that.  And besides, we patch the targets
+	 * in early startup and its a little clearer what we are patching.
+	 */
+wakeup_sw32:
+	.byte	0x66			/* size override to 32 bits */
+	.byte	0xea			/* opcode for far jump */
+	.long	wakeup_32 - wakeup_start /* offset in segment */
+	.word	bootcode32 - bootgdt	/* index in gdt for 32 bit code */
+
+	/*
+	 * At this point, we are running in 32 bit legacy protected mode.
+	 */
+	ALIGN_TEXT
+	.code32
+wakeup_32:
+
+	mov	$bootdata32 - bootgdt, %eax
+	mov	%ax, %ds
+
+	/* Turn on the PAE and PSE bits for when paging is enabled */
+	mov	%cr4, %eax
+	orl	$(CR4_PAE | CR4_PSE), %eax
+	mov	%eax, %cr4
+
+	/*
+	 * Enable EFER.LME so that we get long mode when all the prereqs are
+	 * in place.  In this case, it turns on when CR0_PG is finally enabled.
+	 * Pick up a few other EFER bits that we'll use need we're here.
+	 */
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	orl	$EFER_LME | EFER_SCE, %eax
+	wrmsr
+
+	/*
+	 * Point to the embedded page tables for startup.  Note that this
+	 * only gets accessed after we're actually in 64 bit mode, however
+	 * we can only set the bottom 32 bits of %cr3 in this state.  This
+	 * means we are required to use a temporary page table that is below
+	 * the 4GB limit.  %ebx is still our relocation base.  We could just
+	 * subtract 3 * PAGE_SIZE, but that would be too easy.
+	 */
+	leal	wakeup_pagetables - wakeup_start(%ebx), %eax
+	movl	(%eax), %eax
+	mov	%eax, %cr3
+
+	/*
+	 * Finally, switch to long bit mode by enabling paging.  We have
+	 * to be very careful here because all the segmentation disappears
+	 * out from underneath us.  The spec says we can depend on the
+	 * subsequent pipelined branch to execute, but *only if* everthing
+	 * is still identity mapped.  If any mappings change, the pipeline
+	 * will flush.
+	 */
+	mov	%cr0, %eax
+	orl	$CR0_PG, %eax
+	mov	%eax, %cr0
+
+	/*
+	 * At this point paging is enabled, and we are in "compatability" mode.
+	 * We do another far jump to reload %cs with the 64 bit selector.
+	 * %cr3 points to a 4-level page table page.
+	 * We cannot yet jump all the way to the kernel because we can only
+	 * specify a 32 bit linear address.  So, yet another trampoline.
+	 *
+	 * The following instruction is:
+	 * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start
+	 * but gas cannot assemble that.  And besides, we patch the targets
+	 * in early startup and its a little clearer what we are patching.
+	 */
+wakeup_sw64:
+	.byte	0xea			/* opcode for far jump */
+	.long	wakeup_64 - wakeup_start /* offset in segment */
+	.word	bootcode64 - bootgdt	/* index in gdt for 64 bit code */
+
+	/*
+	 * Yeehar!  We're running in 64-bit mode!  We can mostly ignore our
+	 * segment registers, and get on with it.
+	 * Note that we are running at the correct virtual address, but with
+	 * a 1:1 1GB mirrored mapping over entire address space.  We had better
+	 * switch to a real %cr3 promptly so that we can get to the direct map
+	 * space. Remember that jmp is relative and that we've been relocated,
+	 * so use an indirect jump.
+	 */
+	ALIGN_TEXT
+	.code64
+wakeup_64:
+	mov	$bootdata64 - bootgdt, %eax
+	mov	%ax, %ds
+
+	/* Restore arguments. */
+	movq	wakeup_pcb - wakeup_start(%rbx), %rdi
+	movq	wakeup_ret - wakeup_start(%rbx), %rax
+
+	/* Restore GDT. */
+	lgdt	wakeup_gdt - wakeup_start(%rbx)
+
+	/* Jump to return address. */
+	jmp	*%rax
+
+	.data
+
+resume_beep:
+	.byte	0
+reset_video:
+	.byte	0
+
+	ALIGN_DATA
+bootgdt:
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+
+bootcode64:
+	.long	0x0000ffff
+	.long	0x00af9b00
+
+bootdata64:
+	.long	0x0000ffff
+	.long	0x00af9300
+
+bootcode32:
+	.long	0x0000ffff
+	.long	0x00cf9b00
+
+bootdata32:
+	.long	0x0000ffff
+	.long	0x00cf9300
+bootgdtend:
+
+wakeup_pagetables:
+	.long	0
+
+bootgdtdesc:
+	.word	bootgdtend - bootgdt	/* Length */
+	.long	bootgdt - wakeup_start	/* Offset plus %ds << 4 */
+
+	ALIGN_DATA
+wakeup_pcb:
+	.quad	0
+wakeup_ret:
+	.quad	0
+wakeup_gdt:
+	.word	0
+	.quad	0
+dummy:
diff --git a/sys/amd64/amd64/amd64_mem.c b/sys/amd64/amd64/amd64_mem.c
new file mode 100644
index 0000000..e77a96f
--- /dev/null
+++ b/sys/amd64/amd64/amd64_mem.c
@@ -0,0 +1,755 @@
+/*-
+ * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+/*
+ * amd64 memory range operations
+ *
+ * This code will probably be impenetrable without reference to the
+ * Intel Pentium Pro documentation or x86-64 programmers manual vol 2.
+ */
+
+static char *mem_owner_bios = "BIOS";
+
+#define	MR686_FIXMTRR	(1<<0)
+
+#define	mrwithin(mr, a)							\
+	(((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len)))
+#define	mroverlap(mra, mrb)						\
+	(mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base))
+
+#define	mrvalid(base, len) 						\
+	((!(base & ((1 << 12) - 1))) &&	/* base is multiple of 4k */	\
+	    ((len) >= (1 << 12)) &&	/* length is >= 4k */		\
+	    powerof2((len)) &&		/* ... and power of two */	\
+	    !((base) & ((len) - 1)))	/* range is not discontiuous */
+
+#define	mrcopyflags(curr, new)						\
+	(((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK))
+
+static int mtrrs_disabled;
+TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled);
+SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN,
+    &mtrrs_disabled, 0, "Disable amd64 MTRRs.");
+
+static void	amd64_mrinit(struct mem_range_softc *sc);
+static int	amd64_mrset(struct mem_range_softc *sc,
+		    struct mem_range_desc *mrd, int *arg);
+static void	amd64_mrAPinit(struct mem_range_softc *sc);
+static void	amd64_mrreinit(struct mem_range_softc *sc);
+
+static struct mem_range_ops amd64_mrops = {
+	amd64_mrinit,
+	amd64_mrset,
+	amd64_mrAPinit,
+	amd64_mrreinit
+};
+
+/* XXX for AP startup hook */
+static u_int64_t mtrrcap, mtrrdef;
+
+/* The bitmask for the PhysBase and PhysMask fields of the variable MTRRs. */
+static u_int64_t mtrr_physmask;
+
+static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc,
+		    struct mem_range_desc *mrd);
+static void	amd64_mrfetch(struct mem_range_softc *sc);
+static int	amd64_mtrrtype(int flags);
+static int	amd64_mrt2mtrr(int flags, int oldval);
+static int	amd64_mtrrconflict(int flag1, int flag2);
+static void	amd64_mrstore(struct mem_range_softc *sc);
+static void	amd64_mrstoreone(void *arg);
+static struct mem_range_desc *amd64_mtrrfixsearch(struct mem_range_softc *sc,
+		    u_int64_t addr);
+static int	amd64_mrsetlow(struct mem_range_softc *sc,
+		    struct mem_range_desc *mrd, int *arg);
+static int	amd64_mrsetvariable(struct mem_range_softc *sc,
+		    struct mem_range_desc *mrd, int *arg);
+
+/* amd64 MTRR type to memory range type conversion */
+static int amd64_mtrrtomrt[] = {
+	MDF_UNCACHEABLE,
+	MDF_WRITECOMBINE,
+	MDF_UNKNOWN,
+	MDF_UNKNOWN,
+	MDF_WRITETHROUGH,
+	MDF_WRITEPROTECT,
+	MDF_WRITEBACK
+};
+
+#define	MTRRTOMRTLEN (sizeof(amd64_mtrrtomrt) / sizeof(amd64_mtrrtomrt[0]))
+
+static int
+amd64_mtrr2mrt(int val)
+{
+
+	if (val < 0 || val >= MTRRTOMRTLEN)
+		return (MDF_UNKNOWN);
+	return (amd64_mtrrtomrt[val]);
+}
+
+/*
+ * amd64 MTRR conflicts. Writeback and uncachable may overlap.
+ */
+static int
+amd64_mtrrconflict(int flag1, int flag2)
+{
+
+	flag1 &= MDF_ATTRMASK;
+	flag2 &= MDF_ATTRMASK;
+	if ((flag1 & MDF_UNKNOWN) || (flag2 & MDF_UNKNOWN))
+		return (1);
+	if (flag1 == flag2 ||
+	    (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) ||
+	    (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE))
+		return (0);
+	return (1);
+}
+
+/*
+ * Look for an exactly-matching range.
+ */
+static struct mem_range_desc *
+mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd)
+{
+	struct mem_range_desc *cand;
+	int i;
+
+	for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++)
+		if ((cand->mr_base == mrd->mr_base) &&
+		    (cand->mr_len == mrd->mr_len))
+			return (cand);
+	return (NULL);
+}
+
+/*
+ * Fetch the current mtrr settings from the current CPU (assumed to
+ * all be in sync in the SMP case).  Note that if we are here, we
+ * assume that MTRRs are enabled, and we may or may not have fixed
+ * MTRRs.
+ */
+static void
+amd64_mrfetch(struct mem_range_softc *sc)
+{
+	struct mem_range_desc *mrd;
+	u_int64_t msrv;
+	int i, j, msr;
+
+	mrd = sc->mr_desc;
+
+	/* Get fixed-range MTRRs. */
+	if (sc->mr_cap & MR686_FIXMTRR) {
+		msr = MSR_MTRR64kBase;
+		for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+			msrv = rdmsr(msr);
+			for (j = 0; j < 8; j++, mrd++) {
+				mrd->mr_flags =
+				    (mrd->mr_flags & ~MDF_ATTRMASK) |
+				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
+				if (mrd->mr_owner[0] == 0)
+					strcpy(mrd->mr_owner, mem_owner_bios);
+				msrv = msrv >> 8;
+			}
+		}
+		msr = MSR_MTRR16kBase;
+		for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+			msrv = rdmsr(msr);
+			for (j = 0; j < 8; j++, mrd++) {
+				mrd->mr_flags =
+				    (mrd->mr_flags & ~MDF_ATTRMASK) |
+				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
+				if (mrd->mr_owner[0] == 0)
+					strcpy(mrd->mr_owner, mem_owner_bios);
+				msrv = msrv >> 8;
+			}
+		}
+		msr = MSR_MTRR4kBase;
+		for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+			msrv = rdmsr(msr);
+			for (j = 0; j < 8; j++, mrd++) {
+				mrd->mr_flags =
+				    (mrd->mr_flags & ~MDF_ATTRMASK) |
+				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
+				if (mrd->mr_owner[0] == 0)
+					strcpy(mrd->mr_owner, mem_owner_bios);
+				msrv = msrv >> 8;
+			}
+		}
+	}
+
+	/* Get remainder which must be variable MTRRs. */
+	msr = MSR_MTRRVarBase;
+	for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+		msrv = rdmsr(msr);
+		mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
+		    amd64_mtrr2mrt(msrv & MTRR_PHYSBASE_TYPE);
+		mrd->mr_base = msrv & mtrr_physmask;
+		msrv = rdmsr(msr + 1);
+		mrd->mr_flags = (msrv & MTRR_PHYSMASK_VALID) ?
+		    (mrd->mr_flags | MDF_ACTIVE) :
+		    (mrd->mr_flags & ~MDF_ACTIVE);
+
+		/* Compute the range from the mask. Ick. */
+		mrd->mr_len = (~(msrv & mtrr_physmask) &
+		    (mtrr_physmask | 0xfffL)) + 1;
+		if (!mrvalid(mrd->mr_base, mrd->mr_len))
+			mrd->mr_flags |= MDF_BOGUS;
+
+		/* If unclaimed and active, must be the BIOS. */
+		if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0))
+			strcpy(mrd->mr_owner, mem_owner_bios);
+	}
+}
+
+/*
+ * Return the MTRR memory type matching a region's flags
+ */
+static int
+amd64_mtrrtype(int flags)
+{
+	int i;
+
+	flags &= MDF_ATTRMASK;
+
+	for (i = 0; i < MTRRTOMRTLEN; i++) {
+		if (amd64_mtrrtomrt[i] == MDF_UNKNOWN)
+			continue;
+		if (flags == amd64_mtrrtomrt[i])
+			return (i);
+	}
+	return (-1);
+}
+
+static int
+amd64_mrt2mtrr(int flags, int oldval)
+{
+	int val;
+
+	if ((val = amd64_mtrrtype(flags)) == -1)
+		return (oldval & 0xff);
+	return (val & 0xff);
+}
+
+/*
+ * Update running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+amd64_mrstore(struct mem_range_softc *sc)
+{
+#ifdef SMP
+	/*
+	 * We should use ipi_all_but_self() to call other CPUs into a
+	 * locking gate, then call a target function to do this work.
+	 * The "proper" solution involves a generalised locking gate
+	 * implementation, not ready yet.
+	 */
+	smp_rendezvous(NULL, amd64_mrstoreone, NULL, sc);
+#else
+	disable_intr();				/* disable interrupts */
+	amd64_mrstoreone(sc);
+	enable_intr();
+#endif
+}
+
+/*
+ * Update the current CPU's MTRRs with those represented in the
+ * descriptor list.  Note that we do this wholesale rather than just
+ * stuffing one entry; this is simpler (but slower, of course).
+ */
+static void
+amd64_mrstoreone(void *arg)
+{
+	struct mem_range_softc *sc = arg;
+	struct mem_range_desc *mrd;
+	u_int64_t omsrv, msrv;
+	int i, j, msr;
+	u_long cr0, cr4;
+
+	mrd = sc->mr_desc;
+
+	critical_enter();
+
+	/* Disable PGE. */
+	cr4 = rcr4();
+	load_cr4(cr4 & ~CR4_PGE);
+
+	/* Disable caches (CD = 1, NW = 0). */
+	cr0 = rcr0();
+	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
+
+	/* Flushes caches and TLBs. */
+	wbinvd();
+	invltlb();
+
+	/* Disable MTRRs (E = 0). */
+	wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~MTRR_DEF_ENABLE);
+
+	/* Set fixed-range MTRRs. */
+	if (sc->mr_cap & MR686_FIXMTRR) {
+		msr = MSR_MTRR64kBase;
+		for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
+			msrv = 0;
+			omsrv = rdmsr(msr);
+			for (j = 7; j >= 0; j--) {
+				msrv = msrv << 8;
+				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
+				    omsrv >> (j * 8));
+			}
+			wrmsr(msr, msrv);
+			mrd += 8;
+		}
+		msr = MSR_MTRR16kBase;
+		for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
+			msrv = 0;
+			omsrv = rdmsr(msr);
+			for (j = 7; j >= 0; j--) {
+				msrv = msrv << 8;
+				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
+				    omsrv >> (j * 8));
+			}
+			wrmsr(msr, msrv);
+			mrd += 8;
+		}
+		msr = MSR_MTRR4kBase;
+		for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
+			msrv = 0;
+			omsrv = rdmsr(msr);
+			for (j = 7; j >= 0; j--) {
+				msrv = msrv << 8;
+				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
+				    omsrv >> (j * 8));
+			}
+			wrmsr(msr, msrv);
+			mrd += 8;
+		}
+	}
+
+	/* Set remainder which must be variable MTRRs. */
+	msr = MSR_MTRRVarBase;
+	for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
+		/* base/type register */
+		omsrv = rdmsr(msr);
+		if (mrd->mr_flags & MDF_ACTIVE) {
+			msrv = mrd->mr_base & mtrr_physmask;
+			msrv |= amd64_mrt2mtrr(mrd->mr_flags, omsrv);
+		} else {
+			msrv = 0;
+		}
+		wrmsr(msr, msrv);
+
+		/* mask/active register */
+		if (mrd->mr_flags & MDF_ACTIVE) {
+			msrv = MTRR_PHYSMASK_VALID |
+			    (~(mrd->mr_len - 1) & mtrr_physmask);
+		} else {
+			msrv = 0;
+		}
+		wrmsr(msr + 1, msrv);
+	}
+
+	/* Flush caches and TLBs. */
+	wbinvd();
+	invltlb();
+
+	/* Enable MTRRs. */
+	wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | MTRR_DEF_ENABLE);
+
+	/* Restore caches and PGE. */
+	load_cr0(cr0);
+	load_cr4(cr4);
+
+	critical_exit();
+}
+
+/*
+ * Hunt for the fixed MTRR referencing (addr)
+ */
+static struct mem_range_desc *
+amd64_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr)
+{
+	struct mem_range_desc *mrd;
+	int i;
+
+	for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K);
+	     i++, mrd++)
+		if ((addr >= mrd->mr_base) &&
+		    (addr < (mrd->mr_base + mrd->mr_len)))
+			return (mrd);
+	return (NULL);
+}
+
+/*
+ * Try to satisfy the given range request by manipulating the fixed
+ * MTRRs that cover low memory.
+ *
+ * Note that we try to be generous here; we'll bloat the range out to
+ * the next higher/lower boundary to avoid the consumer having to know
+ * too much about the mechanisms here.
+ *
+ * XXX note that this will have to be updated when we start supporting
+ * "busy" ranges.
+ */
+static int
+amd64_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+	struct mem_range_desc *first_md, *last_md, *curr_md;
+
+	/* Range check. */
+	if (((first_md = amd64_mtrrfixsearch(sc, mrd->mr_base)) == NULL) ||
+	    ((last_md = amd64_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL))
+		return (EINVAL);
+
+	/* Check that we aren't doing something risky. */
+	if (!(mrd->mr_flags & MDF_FORCE))
+		for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+			if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)
+				return (EACCES);
+		}
+
+	/* Set flags, clear set-by-firmware flag. */
+	for (curr_md = first_md; curr_md <= last_md; curr_md++) {
+		curr_md->mr_flags = mrcopyflags(curr_md->mr_flags &
+		    ~MDF_FIRMWARE, mrd->mr_flags);
+		bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner));
+	}
+
+	return (0);
+}
+
+/*
+ * Modify/add a variable MTRR to satisfy the request.
+ *
+ * XXX needs to be updated to properly support "busy" ranges.
+ */
+static int
+amd64_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd,
+    int *arg)
+{
+	struct mem_range_desc *curr_md, *free_md;
+	int i;
+
+	/*
+	 * Scan the currently active variable descriptors, look for
+	 * one we exactly match (straight takeover) and for possible
+	 * accidental overlaps.
+	 *
+	 * Keep track of the first empty variable descriptor in case
+	 * we can't perform a takeover.
+	 */
+	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
+	curr_md = sc->mr_desc + i;
+	free_md = NULL;
+	for (; i < sc->mr_ndesc; i++, curr_md++) {
+		if (curr_md->mr_flags & MDF_ACTIVE) {
+			/* Exact match? */
+			if ((curr_md->mr_base == mrd->mr_base) &&
+			    (curr_md->mr_len == mrd->mr_len)) {
+
+				/* Whoops, owned by someone. */
+				if (curr_md->mr_flags & MDF_BUSY)
+					return (EBUSY);
+
+				/* Check that we aren't doing something risky */
+				if (!(mrd->mr_flags & MDF_FORCE) &&
+				    ((curr_md->mr_flags & MDF_ATTRMASK) ==
+				    MDF_UNKNOWN))
+					return (EACCES);
+
+				/* Ok, just hijack this entry. */
+				free_md = curr_md;
+				break;
+			}
+
+			/* Non-exact overlap? */
+			if (mroverlap(curr_md, mrd)) {
+				/* Between conflicting region types? */
+				if (amd64_mtrrconflict(curr_md->mr_flags,
+				    mrd->mr_flags))
+					return (EINVAL);
+			}
+		} else if (free_md == NULL) {
+			free_md = curr_md;
+		}
+	}
+
+	/* Got somewhere to put it? */
+	if (free_md == NULL)
+		return (ENOSPC);
+
+	/* Set up new descriptor. */
+	free_md->mr_base = mrd->mr_base;
+	free_md->mr_len = mrd->mr_len;
+	free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags);
+	bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner));
+	return (0);
+}
+
+/*
+ * Handle requests to set memory range attributes by manipulating MTRRs.
+ */
+static int
+amd64_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
+{
+	struct mem_range_desc *targ;
+	int error, i;
+
+	switch (*arg) {
+	case MEMRANGE_SET_UPDATE:
+		/*
+		 * Make sure that what's being asked for is even
+		 * possible at all.
+		 */
+		if (!mrvalid(mrd->mr_base, mrd->mr_len) ||
+		    amd64_mtrrtype(mrd->mr_flags) == -1)
+			return (EINVAL);
+
+#define	FIXTOP	((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000))
+
+		/* Are the "low memory" conditions applicable? */
+		if ((sc->mr_cap & MR686_FIXMTRR) &&
+		    ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) {
+			if ((error = amd64_mrsetlow(sc, mrd, arg)) != 0)
+				return (error);
+		} else {
+			/* It's time to play with variable MTRRs. */
+			if ((error = amd64_mrsetvariable(sc, mrd, arg)) != 0)
+				return (error);
+		}
+		break;
+
+	case MEMRANGE_SET_REMOVE:
+		if ((targ = mem_range_match(sc, mrd)) == NULL)
+			return (ENOENT);
+		if (targ->mr_flags & MDF_FIXACTIVE)
+			return (EPERM);
+		if (targ->mr_flags & MDF_BUSY)
+			return (EBUSY);
+		targ->mr_flags &= ~MDF_ACTIVE;
+		targ->mr_owner[0] = 0;
+		break;
+
+	default:
+		return (EOPNOTSUPP);
+	}
+
+	/*
+	 * Ensure that the direct map region does not contain any mappings
+	 * that span MTRRs of different types.  However, the fixed MTRRs can
+	 * be ignored, because a large page mapping the first 1 MB of physical
+	 * memory is a special case that the processor handles.  The entire
+	 * TLB will be invalidated by amd64_mrstore(), so pmap_demote_DMAP()
+	 * needn't do it.
+	 */
+	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
+	mrd = sc->mr_desc + i;
+	for (; i < sc->mr_ndesc; i++, mrd++) {
+		if ((mrd->mr_flags & (MDF_ACTIVE | MDF_BOGUS)) == MDF_ACTIVE)
+			pmap_demote_DMAP(mrd->mr_base, mrd->mr_len, FALSE);
+	}
+
+	/* Update the hardware. */
+	amd64_mrstore(sc);
+
+	/* Refetch to see where we're at. */
+	amd64_mrfetch(sc);
+	return (0);
+}
+
+/*
+ * Work out how many ranges we support, initialise storage for them,
+ * and fetch the initial settings.
+ */
+static void
+amd64_mrinit(struct mem_range_softc *sc)
+{
+	struct mem_range_desc *mrd;
+	u_int regs[4];
+	int i, nmdesc = 0, pabits;
+
+	mtrrcap = rdmsr(MSR_MTRRcap);
+	mtrrdef = rdmsr(MSR_MTRRdefType);
+
+	/* For now, bail out if MTRRs are not enabled. */
+	if (!(mtrrdef & MTRR_DEF_ENABLE)) {
+		if (bootverbose)
+			printf("CPU supports MTRRs but not enabled\n");
+		return;
+	}
+	nmdesc = mtrrcap & MTRR_CAP_VCNT;
+
+	/*
+	 * Determine the size of the PhysMask and PhysBase fields in
+	 * the variable range MTRRs.  If the extended CPUID 0x80000008
+	 * is present, use that to figure out how many physical
+	 * address bits the CPU supports.  Otherwise, default to 36
+	 * address bits.
+	 */
+	if (cpu_exthigh >= 0x80000008) {
+		do_cpuid(0x80000008, regs);
+		pabits = regs[0] & 0xff;
+	} else
+		pabits = 36;
+	mtrr_physmask = ((1UL << pabits) - 1) & ~0xfffUL;
+
+	/* If fixed MTRRs supported and enabled. */
+	if ((mtrrcap & MTRR_CAP_FIXED) && (mtrrdef & MTRR_DEF_FIXED_ENABLE)) {
+		sc->mr_cap = MR686_FIXMTRR;
+		nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K;
+	}
+
+	sc->mr_desc = malloc(nmdesc * sizeof(struct mem_range_desc), M_MEMDESC,
+	    M_WAITOK | M_ZERO);
+	sc->mr_ndesc = nmdesc;
+
+	mrd = sc->mr_desc;
+
+	/* Populate the fixed MTRR entries' base/length. */
+	if (sc->mr_cap & MR686_FIXMTRR) {
+		for (i = 0; i < MTRR_N64K; i++, mrd++) {
+			mrd->mr_base = i * 0x10000;
+			mrd->mr_len = 0x10000;
+			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
+			    MDF_FIXACTIVE;
+		}
+		for (i = 0; i < MTRR_N16K; i++, mrd++) {
+			mrd->mr_base = i * 0x4000 + 0x80000;
+			mrd->mr_len = 0x4000;
+			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
+			    MDF_FIXACTIVE;
+		}
+		for (i = 0; i < MTRR_N4K; i++, mrd++) {
+			mrd->mr_base = i * 0x1000 + 0xc0000;
+			mrd->mr_len = 0x1000;
+			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
+			    MDF_FIXACTIVE;
+		}
+	}
+
+	/*
+	 * Get current settings, anything set now is considered to
+	 * have been set by the firmware. (XXX has something already
+	 * played here?)
+	 */
+	amd64_mrfetch(sc);
+	mrd = sc->mr_desc;
+	for (i = 0; i < sc->mr_ndesc; i++, mrd++) {
+		if (mrd->mr_flags & MDF_ACTIVE)
+			mrd->mr_flags |= MDF_FIRMWARE;
+	}
+
+	/*
+	 * Ensure that the direct map region does not contain any mappings
+	 * that span MTRRs of different types.  However, the fixed MTRRs can
+	 * be ignored, because a large page mapping the first 1 MB of physical
+	 * memory is a special case that the processor handles.  Invalidate
+	 * any old TLB entries that might hold inconsistent memory type
+	 * information. 
+	 */
+	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
+	mrd = sc->mr_desc + i;
+	for (; i < sc->mr_ndesc; i++, mrd++) {
+		if ((mrd->mr_flags & (MDF_ACTIVE | MDF_BOGUS)) == MDF_ACTIVE)
+			pmap_demote_DMAP(mrd->mr_base, mrd->mr_len, TRUE);
+	}
+}
+
+/*
+ * Initialise MTRRs on an AP after the BSP has run the init code.
+ */
+static void
+amd64_mrAPinit(struct mem_range_softc *sc)
+{
+
+	amd64_mrstoreone(sc);
+	wrmsr(MSR_MTRRdefType, mtrrdef);
+}
+
+/*
+ * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
+ * list.
+ *
+ * XXX Must be called with interrupts enabled.
+ */
+static void
+amd64_mrreinit(struct mem_range_softc *sc)
+{
+#ifdef SMP
+	/*
+	 * We should use ipi_all_but_self() to call other CPUs into a
+	 * locking gate, then call a target function to do this work.
+	 * The "proper" solution involves a generalised locking gate
+	 * implementation, not ready yet.
+	 */
+	smp_rendezvous(NULL, (void *)amd64_mrAPinit, NULL, sc);
+#else
+	disable_intr();				/* disable interrupts */
+	amd64_mrAPinit(sc);
+	enable_intr();
+#endif
+}
+
+static void
+amd64_mem_drvinit(void *unused)
+{
+
+	if (mtrrs_disabled)
+		return;
+	if (!(cpu_feature & CPUID_MTRR))
+		return;
+	if ((cpu_id & 0xf00) != 0x600 && (cpu_id & 0xf00) != 0xf00)
+		return;
+	switch (cpu_vendor_id) {
+	case CPU_VENDOR_INTEL:
+	case CPU_VENDOR_AMD:
+	case CPU_VENDOR_CENTAUR:
+		break;
+	default:
+		return;
+	}
+	mem_range_softc.mr_op = &amd64_mrops;
+}
+SYSINIT(amd64memdev, SI_SUB_DRIVERS, SI_ORDER_FIRST, amd64_mem_drvinit, NULL);
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
new file mode 100644
index 0000000..6465247
--- /dev/null
+++ b/sys/amd64/amd64/apic_vector.S
@@ -0,0 +1,326 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: vector.s, 386BSD 0.1 unknown origin
+ * $FreeBSD$
+ */
+
+/*
+ * Interrupt entry points for external interrupts triggered by I/O APICs
+ * as well as IPI handlers.
+ */
+
+#include "opt_smp.h"
+
+#include <machine/asmacros.h>
+#include <x86/apicreg.h>
+
+#include "assym.s"
+
+/*
+ * I/O Interrupt Entry Point.  Rather than having one entry point for
+ * each interrupt source, we use one entry point for each 32-bit word
+ * in the ISR.  The handler determines the highest bit set in the ISR,
+ * translates that into a vector, and passes the vector to the
+ * lapic_handle_intr() function.
+ */
+#define	ISR_VEC(index, vec_name)					\
+	.text ;								\
+	SUPERALIGN_TEXT ;						\
+IDTVEC(vec_name) ;							\
+	PUSH_FRAME ;							\
+	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
+	movq	lapic, %rdx ;	/* pointer to local APIC */		\
+	movl	LA_ISR + 16 * (index)(%rdx), %eax ;	/* load ISR */	\
+	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
+	jz	1f ;							\
+	addl	$(32 * index),%eax ;					\
+	movq	%rsp, %rsi	;                                       \
+	movl	%eax, %edi ;	/* pass the IRQ */			\
+	call	lapic_handle_intr ;					\
+1: ;									\
+	MEXITCOUNT ;							\
+	jmp	doreti
+
+/*
+ * Handle "spurious INTerrupts".
+ * Notes:
+ *  This is different than the "spurious INTerrupt" generated by an
+ *   8259 PIC for missing INTs.  See the APIC documentation for details.
+ *  This routine should NOT do an 'EOI' cycle.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(spuriousint)
+
+	/* No EOI cycle used here */
+
+	jmp	doreti_iret
+
+	ISR_VEC(1, apic_isr1)
+	ISR_VEC(2, apic_isr2)
+	ISR_VEC(3, apic_isr3)
+	ISR_VEC(4, apic_isr4)
+	ISR_VEC(5, apic_isr5)
+	ISR_VEC(6, apic_isr6)
+	ISR_VEC(7, apic_isr7)
+
+/*
+ * Local APIC periodic timer handler.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(timerint)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp, %rdi
+	call	lapic_handle_timer
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Local APIC CMCI handler.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(cmcint)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	call	lapic_handle_cmc
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Local APIC error interrupt handler.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(errorint)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	call	lapic_handle_error
+	MEXITCOUNT
+	jmp	doreti
+
+#ifdef SMP
+/*
+ * Global address space TLB shootdown.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(invltlb)
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	PUSH_FRAME
+	movl	PCPU(CPUID), %eax
+#ifdef COUNT_XINVLTLB_HITS
+	incl	xhits_gbl(,%rax,4)
+#endif
+#ifdef COUNT_IPIS
+	movq	ipi_invltlb_counts(,%rax,8),%rax
+	incq	(%rax)
+#endif
+	POP_FRAME
+#endif
+
+	pushq	%rax
+
+	movq	%cr3, %rax		/* invalidate the TLB */
+	movq	%rax, %cr3
+
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+
+	lock
+	incl	smp_tlb_wait
+
+	popq	%rax
+	jmp	doreti_iret
+
+/*
+ * Single page TLB shootdown
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(invlpg)
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	PUSH_FRAME
+	movl	PCPU(CPUID), %eax
+#ifdef COUNT_XINVLTLB_HITS
+	incl	xhits_pg(,%rax,4)
+#endif
+#ifdef COUNT_IPIS
+	movq	ipi_invlpg_counts(,%rax,8),%rax
+	incq	(%rax)
+#endif
+	POP_FRAME
+#endif
+
+	pushq	%rax
+
+	movq	smp_tlb_addr1, %rax
+	invlpg	(%rax)			/* invalidate single page */
+
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+
+	lock
+	incl	smp_tlb_wait
+
+	popq	%rax
+	jmp	doreti_iret
+
+/*
+ * Page range TLB shootdown.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(invlrng)
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	PUSH_FRAME
+	movl	PCPU(CPUID), %eax
+#ifdef COUNT_XINVLTLB_HITS
+	incl	xhits_rng(,%rax,4)
+#endif
+#ifdef COUNT_IPIS
+	movq	ipi_invlrng_counts(,%rax,8),%rax
+	incq	(%rax)
+#endif
+	POP_FRAME
+#endif
+
+	pushq	%rax
+	pushq	%rdx
+
+	movq	smp_tlb_addr1, %rdx
+	movq	smp_tlb_addr2, %rax
+1:	invlpg	(%rdx)			/* invalidate single page */
+	addq	$PAGE_SIZE, %rdx
+	cmpq	%rax, %rdx
+	jb	1b
+
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+
+	lock
+	incl	smp_tlb_wait
+
+	popq	%rdx
+	popq	%rax
+	jmp	doreti_iret
+
+/*
+ * Invalidate cache.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(invlcache)
+#ifdef COUNT_IPIS
+	PUSH_FRAME
+	movl	PCPU(CPUID), %eax
+	movq	ipi_invlcache_counts(,%rax,8),%rax
+	incq	(%rax)
+	POP_FRAME
+#endif
+
+	pushq	%rax
+
+	wbinvd
+
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+
+	lock
+	incl	smp_tlb_wait
+
+	popq	%rax
+	jmp	doreti_iret
+
+/*
+ * Handler for IPIs sent via the per-cpu IPI bitmap.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(ipi_intr_bitmap_handler)		
+	PUSH_FRAME
+
+	movq	lapic, %rdx
+	movl	$0, LA_EOI(%rdx)	/* End Of Interrupt to APIC */
+	
+	FAKE_MCOUNT(TF_RIP(%rsp))
+
+	call	ipi_bitmap_handler
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Executed by a CPU when it receives an IPI_STOP from another CPU.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(cpustop)
+	PUSH_FRAME
+
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+
+	call	cpustop_handler
+	jmp	doreti
+
+/*
+ * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(cpususpend)
+	PUSH_FRAME
+
+	call	cpususpend_handler
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	jmp	doreti
+
+/*
+ * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
+ *
+ * - Calls the generic rendezvous action function.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(rendezvous)
+	PUSH_FRAME
+#ifdef COUNT_IPIS
+	movl	PCPU(CPUID), %eax
+	movq	ipi_rendezvous_counts(,%rax,8), %rax
+	incq	(%rax)
+#endif
+	call	smp_rendezvous_action
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	jmp	doreti
+#endif /* SMP */
diff --git a/sys/amd64/amd64/atomic.c b/sys/amd64/amd64/atomic.c
new file mode 100644
index 0000000..1b4ff7e
--- /dev/null
+++ b/sys/amd64/amd64/atomic.c
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1999 Peter Jeremy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* This file creates publically callable functions to perform various
+ * simple arithmetic on memory which is atomic in the presence of
+ * interrupts and multiple processors.
+ */
+#include <sys/types.h>
+
+/* Firstly make atomic.h generate prototypes as it will for kernel modules */
+#define KLD_MODULE
+#include <machine/atomic.h>
+#undef _MACHINE_ATOMIC_H_	/* forget we included it */
+#undef KLD_MODULE
+#undef ATOMIC_ASM
+
+/* Make atomic.h generate public functions */
+#define WANT_FUNCTIONS
+#define static
+#undef __inline
+#define __inline
+
+#include <machine/atomic.h>
diff --git a/sys/amd64/amd64/atpic_vector.S b/sys/amd64/amd64/atpic_vector.S
new file mode 100644
index 0000000..e7dcbc3
--- /dev/null
+++ b/sys/amd64/amd64/atpic_vector.S
@@ -0,0 +1,73 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: vector.s, 386BSD 0.1 unknown origin
+ * $FreeBSD$
+ */
+
+/*
+ * Interrupt entry points for external interrupts triggered by the 8259A
+ * master and slave interrupt controllers.
+ */
+
+#include <machine/asmacros.h>
+
+#include "assym.s"
+
+/*
+ * Macros for interrupt entry, call to handler, and exit.
+ */
+#define	INTR(irq_num, vec_name) \
+	.text ;								\
+	SUPERALIGN_TEXT ;						\
+IDTVEC(vec_name) ;							\
+	PUSH_FRAME ;							\
+	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
+	movq	%rsp, %rsi	;                                       \
+	movl	$irq_num, %edi; 	/* pass the IRQ */		\
+	call	atpic_handle_intr ;					\
+	MEXITCOUNT ;							\
+	jmp	doreti
+
+	INTR(0, atpic_intr0)
+	INTR(1, atpic_intr1)
+	INTR(2, atpic_intr2)
+	INTR(3, atpic_intr3)
+	INTR(4, atpic_intr4)
+	INTR(5, atpic_intr5)
+	INTR(6, atpic_intr6)
+	INTR(7, atpic_intr7)
+	INTR(8, atpic_intr8)
+	INTR(9, atpic_intr9)
+	INTR(10, atpic_intr10)
+	INTR(11, atpic_intr11)
+	INTR(12, atpic_intr12)
+	INTR(13, atpic_intr13)
+	INTR(14, atpic_intr14)
+	INTR(15, atpic_intr15)
diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c
new file mode 100644
index 0000000..ee32740
--- /dev/null
+++ b/sys/amd64/amd64/autoconf.c
@@ -0,0 +1,132 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)autoconf.c	7.1 (Berkeley) 5/9/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Setup the system to run on the current machine.
+ *
+ * Configure() is called at boot time and initializes the vba
+ * device tables and the memory controller monitoring.  Available
+ * devices are determined (from possibilities mentioned in ioconf.c),
+ * and the drivers are initialized.
+ */
+#include "opt_bootp.h"
+#include "opt_isa.h"
+#include "opt_bus.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/reboot.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/cons.h>
+
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include <machine/md_var.h>
+
+#ifdef DEV_ISA
+#include <isa/isavar.h>
+
+device_t isa_bus_device = 0;
+#endif
+
+static void	configure_first(void *);
+static void	configure(void *);
+static void	configure_final(void *);
+
+SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL);
+/* SI_ORDER_SECOND is hookable */
+SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL);
+/* SI_ORDER_MIDDLE is hookable */
+SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
+
+/*
+ * Determine i/o configuration for a machine.
+ */
+static void
+configure_first(dummy)
+	void *dummy;
+{
+
+	/* nexus0 is the top of the amd64 device tree */
+	device_add_child(root_bus, "nexus", 0);
+}
+
+static void
+configure(dummy)
+	void *dummy;
+{
+
+	/*
+	 * Enable interrupts on the processor.  The interrupts are still
+	 * disabled in the interrupt controllers until interrupt handlers
+	 * are registered.
+	 */
+	enable_intr();
+
+	/* initialize new bus architecture */
+	root_bus_configure();
+
+#ifdef DEV_ISA
+	/*
+	 * Explicitly probe and attach ISA last.  The isa bus saves
+	 * it's device node at attach time for us here.
+	 */
+	if (isa_bus_device)
+		isa_probe_children(isa_bus_device);
+#endif
+}
+
+static void
+configure_final(dummy)
+	void *dummy;
+{
+
+	cninit_finish(); 
+	if (bootverbose)
+		printf("Device configuration finished.\n");
+	cold = 0;
+}
diff --git a/sys/amd64/amd64/bios.c b/sys/amd64/amd64/bios.c
new file mode 100644
index 0000000..c8985c1
--- /dev/null
+++ b/sys/amd64/amd64/bios.c
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 1997 Michael Smith
+ * Copyright (c) 1998 Jonathan Lemon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Subset of the i386 bios support code.  We cannot make bios16 nor bios32
+ * calls, so we can leave that out.  However, searching for bios rom
+ * signatures can be useful for locating tables, eg: powernow settings.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/pc/bios.h>
+
+#define BIOS_START	0xe0000
+#define BIOS_SIZE	0x20000
+
+/*
+ * bios_sigsearch
+ *
+ * Search some or all of the BIOS region for a signature string.
+ *
+ * (start)	Optional offset returned from this function 
+ *		(for searching for multiple matches), or NULL
+ *		to start the search from the base of the BIOS.
+ *		Note that this will be a _physical_ address in
+ *		the range 0xe0000 - 0xfffff.
+ * (sig)	is a pointer to the byte(s) of the signature.
+ * (siglen)	number of bytes in the signature.
+ * (paralen)	signature paragraph (alignment) size.
+ * (sigofs)	offset of the signature within the paragraph.
+ *
+ * Returns the _physical_ address of the found signature, 0 if the
+ * signature was not found.
+ */
+
+u_int32_t
+bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs)
+{
+    u_char	*sp, *end;
+    
+    /* compute the starting address */
+    if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) {
+	sp = (char *)BIOS_PADDRTOVADDR(start);
+    } else if (start == 0) {
+	sp = (char *)BIOS_PADDRTOVADDR(BIOS_START);
+    } else {
+	return 0;				/* bogus start address */
+    }
+
+    /* compute the end address */
+    end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE);
+
+    /* loop searching */
+    while ((sp + sigofs + siglen) < end) {
+	
+	/* compare here */
+	if (!bcmp(sp + sigofs, sig, siglen)) {
+	    /* convert back to physical address */
+	    return((u_int32_t)(uintptr_t)BIOS_VADDRTOPADDR(sp));
+	}
+	sp += paralen;
+    }
+    return(0);
+}
diff --git a/sys/amd64/amd64/bpf_jit_machdep.c b/sys/amd64/amd64/bpf_jit_machdep.c
new file mode 100644
index 0000000..fe861d2
--- /dev/null
+++ b/sys/amd64/amd64/bpf_jit_machdep.c
@@ -0,0 +1,653 @@
+/*-
+ * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include "opt_bpf.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+#include <net/if.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#endif
+
+#include <sys/types.h>
+
+#include <net/bpf.h>
+#include <net/bpf_jitter.h>
+
+#include <amd64/amd64/bpf_jit_machdep.h>
+
+bpf_filter_func	bpf_jit_compile(struct bpf_insn *, u_int, size_t *);
+
+/*
+ * Emit routine to update the jump table.
+ */
+static void
+emit_length(bpf_bin_stream *stream, __unused u_int value, u_int len)
+{
+
+	if (stream->refs != NULL)
+		(stream->refs)[stream->bpf_pc] += len;
+	stream->cur_ip += len;
+}
+
+/*
+ * Emit routine to output the actual binary code.
+ */
+static void
+emit_code(bpf_bin_stream *stream, u_int value, u_int len)
+{
+
+	switch (len) {
+	case 1:
+		stream->ibuf[stream->cur_ip] = (u_char)value;
+		stream->cur_ip++;
+		break;
+
+	case 2:
+		*((u_short *)(stream->ibuf + stream->cur_ip)) = (u_short)value;
+		stream->cur_ip += 2;
+		break;
+
+	case 4:
+		*((u_int *)(stream->ibuf + stream->cur_ip)) = value;
+		stream->cur_ip += 4;
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Scan the filter program and find possible optimization.
+ */
+static int
+bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
+{
+	int flags;
+	u_int i;
+
+	/* Do we return immediately? */
+	if (BPF_CLASS(prog[0].code) == BPF_RET)
+		return (BPF_JIT_FRET);
+
+	for (flags = 0, i = 0; i < nins; i++) {
+		switch (prog[i].code) {
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LDX|BPF_MSH|BPF_B:
+			flags |= BPF_JIT_FPKT;
+			break;
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			flags |= BPF_JIT_FMEM;
+			break;
+		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_LDX|BPF_W|BPF_LEN:
+			flags |= BPF_JIT_FLEN;
+			break;
+		case BPF_JMP|BPF_JA:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			flags |= BPF_JIT_FJMP;
+			break;
+		}
+		if (flags == BPF_JIT_FLAG_ALL)
+			break;
+	}
+
+	return (flags);
+}
+
+/*
+ * Function that does the real stuff.
+ */
+bpf_filter_func
+bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
+{
+	bpf_bin_stream stream;
+	struct bpf_insn *ins;
+	int flags, fret, fpkt, fmem, fjmp, flen;
+	u_int i, pass;
+
+	/*
+	 * NOTE: Do not modify the name of this variable, as it's used by
+	 * the macros to emit code.
+	 */
+	emit_func emitm;
+
+	flags = bpf_jit_optimize(prog, nins);
+	fret = (flags & BPF_JIT_FRET) != 0;
+	fpkt = (flags & BPF_JIT_FPKT) != 0;
+	fmem = (flags & BPF_JIT_FMEM) != 0;
+	fjmp = (flags & BPF_JIT_FJMP) != 0;
+	flen = (flags & BPF_JIT_FLEN) != 0;
+
+	if (fret)
+		nins = 1;
+
+	memset(&stream, 0, sizeof(stream));
+
+	/* Allocate the reference table for the jumps. */
+	if (fjmp) {
+#ifdef _KERNEL
+		stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
+		    M_NOWAIT | M_ZERO);
+#else
+		stream.refs = calloc(nins + 1, sizeof(u_int));
+#endif
+		if (stream.refs == NULL)
+			return (NULL);
+	}
+
+	/*
+	 * The first pass will emit the lengths of the instructions
+	 * to create the reference table.
+	 */
+	emitm = emit_length;
+
+	for (pass = 0; pass < 2; pass++) {
+		ins = prog;
+
+		/* Create the procedure header. */
+		if (fmem) {
+			PUSH(RBP);
+			MOVrq(RSP, RBP);
+			SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP);
+		}
+		if (flen)
+			MOVrd2(ESI, R9D);
+		if (fpkt) {
+			MOVrq2(RDI, R8);
+			MOVrd(EDX, EDI);
+		}
+
+		for (i = 0; i < nins; i++) {
+			stream.bpf_pc++;
+
+			switch (ins->code) {
+			default:
+#ifdef _KERNEL
+				return (NULL);
+#else
+				abort();
+#endif
+
+			case BPF_RET|BPF_K:
+				MOVid(ins->k, EAX);
+				if (fmem)
+					LEAVE();
+				RET();
+				break;
+
+			case BPF_RET|BPF_A:
+				if (fmem)
+					LEAVE();
+				RET();
+				break;
+
+			case BPF_LD|BPF_W|BPF_ABS:
+				MOVid(ins->k, ESI);
+				CMPrd(EDI, ESI);
+				JAb(12);
+				MOVrd(EDI, ECX);
+				SUBrd(ESI, ECX);
+				CMPid(sizeof(int32_t), ECX);
+				if (fmem) {
+					JAEb(4);
+					ZEROrd(EAX);
+					LEAVE();
+				} else {
+					JAEb(3);
+					ZEROrd(EAX);
+				}
+				RET();
+				MOVrq3(R8, RCX);
+				MOVobd(RCX, RSI, EAX);
+				BSWAP(EAX);
+				break;
+
+			case BPF_LD|BPF_H|BPF_ABS:
+				ZEROrd(EAX);
+				MOVid(ins->k, ESI);
+				CMPrd(EDI, ESI);
+				JAb(12);
+				MOVrd(EDI, ECX);
+				SUBrd(ESI, ECX);
+				CMPid(sizeof(int16_t), ECX);
+				if (fmem) {
+					JAEb(2);
+					LEAVE();
+				} else
+					JAEb(1);
+				RET();
+				MOVrq3(R8, RCX);
+				MOVobw(RCX, RSI, AX);
+				SWAP_AX();
+				break;
+
+			case BPF_LD|BPF_B|BPF_ABS:
+				ZEROrd(EAX);
+				MOVid(ins->k, ESI);
+				CMPrd(EDI, ESI);
+				if (fmem) {
+					JBb(2);
+					LEAVE();
+				} else
+					JBb(1);
+				RET();
+				MOVrq3(R8, RCX);
+				MOVobb(RCX, RSI, AL);
+				break;
+
+			case BPF_LD|BPF_W|BPF_LEN:
+				MOVrd3(R9D, EAX);
+				break;
+
+			case BPF_LDX|BPF_W|BPF_LEN:
+				MOVrd3(R9D, EDX);
+				break;
+
+			case BPF_LD|BPF_W|BPF_IND:
+				CMPrd(EDI, EDX);
+				JAb(27);
+				MOVid(ins->k, ESI);
+				MOVrd(EDI, ECX);
+				SUBrd(EDX, ECX);
+				CMPrd(ESI, ECX);
+				JBb(14);
+				ADDrd(EDX, ESI);
+				MOVrd(EDI, ECX);
+				SUBrd(ESI, ECX);
+				CMPid(sizeof(int32_t), ECX);
+				if (fmem) {
+					JAEb(4);
+					ZEROrd(EAX);
+					LEAVE();
+				} else {
+					JAEb(3);
+					ZEROrd(EAX);
+				}
+				RET();
+				MOVrq3(R8, RCX);
+				MOVobd(RCX, RSI, EAX);
+				BSWAP(EAX);
+				break;
+
+			case BPF_LD|BPF_H|BPF_IND:
+				ZEROrd(EAX);
+				CMPrd(EDI, EDX);
+				JAb(27);
+				MOVid(ins->k, ESI);
+				MOVrd(EDI, ECX);
+				SUBrd(EDX, ECX);
+				CMPrd(ESI, ECX);
+				JBb(14);
+				ADDrd(EDX, ESI);
+				MOVrd(EDI, ECX);
+				SUBrd(ESI, ECX);
+				CMPid(sizeof(int16_t), ECX);
+				if (fmem) {
+					JAEb(2);
+					LEAVE();
+				} else
+					JAEb(1);
+				RET();
+				MOVrq3(R8, RCX);
+				MOVobw(RCX, RSI, AX);
+				SWAP_AX();
+				break;
+
+			case BPF_LD|BPF_B|BPF_IND:
+				ZEROrd(EAX);
+				CMPrd(EDI, EDX);
+				JAEb(13);
+				MOVid(ins->k, ESI);
+				MOVrd(EDI, ECX);
+				SUBrd(EDX, ECX);
+				CMPrd(ESI, ECX);
+				if (fmem) {
+					JAb(2);
+					LEAVE();
+				} else
+					JAb(1);
+				RET();
+				MOVrq3(R8, RCX);
+				ADDrd(EDX, ESI);
+				MOVobb(RCX, RSI, AL);
+				break;
+
+			case BPF_LDX|BPF_MSH|BPF_B:
+				MOVid(ins->k, ESI);
+				CMPrd(EDI, ESI);
+				if (fmem) {
+					JBb(4);
+					ZEROrd(EAX);
+					LEAVE();
+				} else {
+					JBb(3);
+					ZEROrd(EAX);
+				}
+				RET();
+				ZEROrd(EDX);
+				MOVrq3(R8, RCX);
+				MOVobb(RCX, RSI, DL);
+				ANDib(0x0f, DL);
+				SHLib(2, EDX);
+				break;
+
+			case BPF_LD|BPF_IMM:
+				MOVid(ins->k, EAX);
+				break;
+
+			case BPF_LDX|BPF_IMM:
+				MOVid(ins->k, EDX);
+				break;
+
+			case BPF_LD|BPF_MEM:
+				MOVid(ins->k * sizeof(uint32_t), ESI);
+				MOVobd(RSP, RSI, EAX);
+				break;
+
+			case BPF_LDX|BPF_MEM:
+				MOVid(ins->k * sizeof(uint32_t), ESI);
+				MOVobd(RSP, RSI, EDX);
+				break;
+
+			case BPF_ST:
+				/*
+				 * XXX this command and the following could
+				 * be optimized if the previous instruction
+				 * was already of this type
+				 */
+				MOVid(ins->k * sizeof(uint32_t), ESI);
+				MOVomd(EAX, RSP, RSI);
+				break;
+
+			case BPF_STX:
+				MOVid(ins->k * sizeof(uint32_t), ESI);
+				MOVomd(EDX, RSP, RSI);
+				break;
+
+			case BPF_JMP|BPF_JA:
+				JUMP(ins->k);
+				break;
+
+			case BPF_JMP|BPF_JGT|BPF_K:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPid(ins->k, EAX);
+				JCC(JA, JBE);
+				break;
+
+			case BPF_JMP|BPF_JGE|BPF_K:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPid(ins->k, EAX);
+				JCC(JAE, JB);
+				break;
+
+			case BPF_JMP|BPF_JEQ|BPF_K:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPid(ins->k, EAX);
+				JCC(JE, JNE);
+				break;
+
+			case BPF_JMP|BPF_JSET|BPF_K:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				TESTid(ins->k, EAX);
+				JCC(JNE, JE);
+				break;
+
+			case BPF_JMP|BPF_JGT|BPF_X:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPrd(EDX, EAX);
+				JCC(JA, JBE);
+				break;
+
+			case BPF_JMP|BPF_JGE|BPF_X:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPrd(EDX, EAX);
+				JCC(JAE, JB);
+				break;
+
+			case BPF_JMP|BPF_JEQ|BPF_X:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				CMPrd(EDX, EAX);
+				JCC(JE, JNE);
+				break;
+
+			case BPF_JMP|BPF_JSET|BPF_X:
+				if (ins->jt == ins->jf) {
+					JUMP(ins->jt);
+					break;
+				}
+				TESTrd(EDX, EAX);
+				JCC(JNE, JE);
+				break;
+
+			case BPF_ALU|BPF_ADD|BPF_X:
+				ADDrd(EDX, EAX);
+				break;
+
+			case BPF_ALU|BPF_SUB|BPF_X:
+				SUBrd(EDX, EAX);
+				break;
+
+			case BPF_ALU|BPF_MUL|BPF_X:
+				MOVrd(EDX, ECX);
+				MULrd(EDX);
+				MOVrd(ECX, EDX);
+				break;
+
+			case BPF_ALU|BPF_DIV|BPF_X:
+				TESTrd(EDX, EDX);
+				if (fmem) {
+					JNEb(4);
+					ZEROrd(EAX);
+					LEAVE();
+				} else {
+					JNEb(3);
+					ZEROrd(EAX);
+				}
+				RET();
+				MOVrd(EDX, ECX);
+				ZEROrd(EDX);
+				DIVrd(ECX);
+				MOVrd(ECX, EDX);
+				break;
+
+			case BPF_ALU|BPF_AND|BPF_X:
+				ANDrd(EDX, EAX);
+				break;
+
+			case BPF_ALU|BPF_OR|BPF_X:
+				ORrd(EDX, EAX);
+				break;
+
+			case BPF_ALU|BPF_LSH|BPF_X:
+				MOVrd(EDX, ECX);
+				SHL_CLrb(EAX);
+				break;
+
+			case BPF_ALU|BPF_RSH|BPF_X:
+				MOVrd(EDX, ECX);
+				SHR_CLrb(EAX);
+				break;
+
+			case BPF_ALU|BPF_ADD|BPF_K:
+				ADD_EAXi(ins->k);
+				break;
+
+			case BPF_ALU|BPF_SUB|BPF_K:
+				SUB_EAXi(ins->k);
+				break;
+
+			case BPF_ALU|BPF_MUL|BPF_K:
+				MOVrd(EDX, ECX);
+				MOVid(ins->k, EDX);
+				MULrd(EDX);
+				MOVrd(ECX, EDX);
+				break;
+
+			case BPF_ALU|BPF_DIV|BPF_K:
+				MOVrd(EDX, ECX);
+				ZEROrd(EDX);
+				MOVid(ins->k, ESI);
+				DIVrd(ESI);
+				MOVrd(ECX, EDX);
+				break;
+
+			case BPF_ALU|BPF_AND|BPF_K:
+				ANDid(ins->k, EAX);
+				break;
+
+			case BPF_ALU|BPF_OR|BPF_K:
+				ORid(ins->k, EAX);
+				break;
+
+			case BPF_ALU|BPF_LSH|BPF_K:
+				SHLib((ins->k) & 0xff, EAX);
+				break;
+
+			case BPF_ALU|BPF_RSH|BPF_K:
+				SHRib((ins->k) & 0xff, EAX);
+				break;
+
+			case BPF_ALU|BPF_NEG:
+				NEGd(EAX);
+				break;
+
+			case BPF_MISC|BPF_TAX:
+				MOVrd(EAX, EDX);
+				break;
+
+			case BPF_MISC|BPF_TXA:
+				MOVrd(EDX, EAX);
+				break;
+			}
+			ins++;
+		}
+
+		if (pass > 0)
+			continue;
+
+		*size = stream.cur_ip;
+#ifdef _KERNEL
+		stream.ibuf = malloc(*size, M_BPFJIT, M_NOWAIT);
+		if (stream.ibuf == NULL)
+			break;
+#else
+		stream.ibuf = mmap(NULL, *size, PROT_READ | PROT_WRITE,
+		    MAP_ANON, -1, 0);
+		if (stream.ibuf == MAP_FAILED) {
+			stream.ibuf = NULL;
+			break;
+		}
+#endif
+
+		/*
+		 * Modify the reference table to contain the offsets and
+		 * not the lengths of the instructions.
+		 */
+		if (fjmp)
+			for (i = 1; i < nins + 1; i++)
+				stream.refs[i] += stream.refs[i - 1];
+
+		/* Reset the counters. */
+		stream.cur_ip = 0;
+		stream.bpf_pc = 0;
+
+		/* The second pass creates the actual code. */
+		emitm = emit_code;
+	}
+
+	/*
+	 * The reference table is needed only during compilation,
+	 * now we can free it.
+	 */
+	if (fjmp)
+#ifdef _KERNEL
+		free(stream.refs, M_BPFJIT);
+#else
+		free(stream.refs);
+#endif
+
+#ifndef _KERNEL
+	if (stream.ibuf != NULL &&
+	    mprotect(stream.ibuf, *size, PROT_READ | PROT_EXEC) != 0) {
+		munmap(stream.ibuf, *size);
+		stream.ibuf = NULL;
+	}
+#endif
+
+	return ((bpf_filter_func)stream.ibuf);
+}
diff --git a/sys/amd64/amd64/bpf_jit_machdep.h b/sys/amd64/amd64/bpf_jit_machdep.h
new file mode 100644
index 0000000..01c251f
--- /dev/null
+++ b/sys/amd64/amd64/bpf_jit_machdep.h
@@ -0,0 +1,482 @@
+/*-
+ * Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BPF_JIT_MACHDEP_H_
+#define _BPF_JIT_MACHDEP_H_
+
+/*
+ * Registers
+ */
+#define RAX	0
+#define RCX	1
+#define RDX	2
+#define RBX	3
+#define RSP	4
+#define RBP	5
+#define RSI	6
+#define RDI	7
+#define R8	0
+#define R9	1
+#define R10	2
+#define R11	3
+#define R12	4
+#define R13	5
+#define R14	6
+#define R15	7
+
+#define EAX	0
+#define ECX	1
+#define EDX	2
+#define EBX	3
+#define ESP	4
+#define EBP	5
+#define ESI	6
+#define EDI	7
+#define R8D	0
+#define R9D	1
+#define R10D	2
+#define R11D	3
+#define R12D	4
+#define R13D	5
+#define R14D	6
+#define R15D	7
+
+#define AX	0
+#define CX	1
+#define DX	2
+#define BX	3
+#define SP	4
+#define BP	5
+#define SI	6
+#define DI	7
+
+#define AL	0
+#define CL	1
+#define DL	2
+#define BL	3
+
+/* Optimization flags */
+#define	BPF_JIT_FRET	0x01
+#define	BPF_JIT_FPKT	0x02
+#define	BPF_JIT_FMEM	0x04
+#define	BPF_JIT_FJMP	0x08
+#define	BPF_JIT_FLEN	0x10
+
+#define	BPF_JIT_FLAG_ALL	\
+    (BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FLEN)
+
+/* A stream of native binary code */
+typedef struct bpf_bin_stream {
+	/* Current native instruction pointer. */
+	int		cur_ip;
+
+	/*
+	 * Current BPF instruction pointer, i.e. position in
+	 * the BPF program reached by the jitter.
+	 */
+	int		bpf_pc;
+
+	/* Instruction buffer, contains the generated native code. */
+	char		*ibuf;
+
+	/* Jumps reference table. */
+	u_int		*refs;
+} bpf_bin_stream;
+
+/*
+ * Prototype of the emit functions.
+ *
+ * Different emit functions are used to create the reference table and
+ * to generate the actual filtering code. This allows to have simpler
+ * instruction macros.
+ * The first parameter is the stream that will receive the data.
+ * The second one is a variable containing the data.
+ * The third one is the length, that can be 1, 2, or 4 since it is possible
+ * to emit a byte, a short, or a word at a time.
+ */
+typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
+
+/*
+ * Native instruction macros
+ */
+
+/* movl i32,r32 */
+#define MOVid(i32, r32) do {						\
+	emitm(&stream, (11 << 4) | (1 << 3) | (r32 & 0x7), 1);		\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* movq i64,r64 */
+#define MOViq(i64, r64) do {						\
+	emitm(&stream, 0x48, 1);					\
+	emitm(&stream, (11 << 4) | (1 << 3) | (r64 & 0x7), 1);		\
+	emitm(&stream, i64, 4);						\
+	emitm(&stream, (i64 >> 32), 4);					\
+} while (0)
+
+/* movl sr32,dr32 */
+#define MOVrd(sr32, dr32) do {						\
+	emitm(&stream, 0x89, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* movl sr32,dr32 (dr32 = %r8-15d) */
+#define MOVrd2(sr32, dr32) do {						\
+	emitm(&stream, 0x8941, 2);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* movl sr32,dr32 (sr32 = %r8-15d) */
+#define MOVrd3(sr32, dr32) do {						\
+	emitm(&stream, 0x8944, 2);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* movq sr64,dr64 */
+#define MOVrq(sr64, dr64) do {						\
+	emitm(&stream, 0x8948, 2);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr64 & 0x7) << 3) | (dr64 & 0x7), 1);		\
+} while (0)
+
+/* movq sr64,dr64 (dr64 = %r8-15) */
+#define MOVrq2(sr64, dr64) do {						\
+	emitm(&stream, 0x8949, 2);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr64 & 0x7) << 3) | (dr64 & 0x7), 1);		\
+} while (0)
+
+/* movq sr64,dr64 (sr64 = %r8-15) */
+#define MOVrq3(sr64, dr64) do {						\
+	emitm(&stream, 0x894c, 2);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr64 & 0x7) << 3) | (dr64 & 0x7), 1);		\
+} while (0)
+
+/* movl (sr64,or64,1),dr32 */
+#define MOVobd(sr64, or64, dr32) do {					\
+	emitm(&stream, 0x8b, 1);					\
+	emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
+} while (0)
+
+/* movw (sr64,or64,1),dr16 */
+#define MOVobw(sr64, or64, dr16) do {					\
+	emitm(&stream, 0x8b66, 2);					\
+	emitm(&stream, ((dr16 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
+} while (0)
+
+/* movb (sr64,or64,1),dr8 */
+#define MOVobb(sr64, or64, dr8) do {					\
+	emitm(&stream, 0x8a, 1);					\
+	emitm(&stream, ((dr8 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
+} while (0)
+
+/* movl sr32,(dr64,or64,1) */
+#define MOVomd(sr32, dr64, or64) do {					\
+	emitm(&stream, 0x89, 1);					\
+	emitm(&stream, ((sr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or64 & 0x7) << 3) | (dr64 & 0x7), 1);		\
+} while (0)
+
+/* bswapl dr32 */
+#define BSWAP(dr32) do {						\
+	emitm(&stream, 0xf, 1);						\
+	emitm(&stream, (0x19 << 3) | dr32, 1);				\
+} while (0)
+
+/* xchgb %al,%ah */
+#define SWAP_AX() do {							\
+	emitm(&stream, 0xc486, 2);					\
+} while (0)
+
+/* pushq r64 */
+#define PUSH(r64) do {							\
+	emitm(&stream, (5 << 4) | (0 << 3) | (r64 & 0x7), 1);		\
+} while (0)
+
+/* leaveq */
+#define LEAVE() do {							\
+	emitm(&stream, 0xc9, 1);					\
+} while (0)
+
+/* retq */
+#define RET() do {							\
+	emitm(&stream, 0xc3, 1);					\
+} while (0)
+
+/* addl sr32,dr32 */
+#define ADDrd(sr32, dr32) do {						\
+	emitm(&stream, 0x01, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* addl i32,%eax */
+#define ADD_EAXi(i32) do {						\
+	emitm(&stream, 0x05, 1);					\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* addl i8,r32 */
+#define ADDib(i8, r32) do {						\
+	emitm(&stream, 0x83, 1);					\
+	emitm(&stream, (24 << 3) | r32, 1);				\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* subl sr32,dr32 */
+#define SUBrd(sr32, dr32) do {						\
+	emitm(&stream, 0x29, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* subl i32,%eax */
+#define SUB_EAXi(i32) do {						\
+	emitm(&stream, 0x2d, 1);					\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* subq i8,r64 */
+#define SUBib(i8, r64) do {						\
+	emitm(&stream, 0x8348, 2);					\
+	emitm(&stream, (29 << 3) | (r64 & 0x7), 1);			\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* mull r32 */
+#define MULrd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* divl r32 */
+#define DIVrd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (15 << 4) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* andb i8,r8 */
+#define ANDib(i8, r8) do {						\
+	if (r8 == AL) {							\
+		emitm(&stream, 0x24, 1);				\
+	} else {							\
+		emitm(&stream, 0x80, 1);				\
+		emitm(&stream, (7 << 5) | r8, 1);			\
+	}								\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* andl i32,r32 */
+#define ANDid(i32, r32) do {						\
+	if (r32 == EAX) {						\
+		emitm(&stream, 0x25, 1);				\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (7 << 5) | r32, 1);			\
+	}								\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* andl sr32,dr32 */
+#define ANDrd(sr32, dr32) do {						\
+	emitm(&stream, 0x21, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* testl i32,r32 */
+#define TESTid(i32, r32) do {						\
+	if (r32 == EAX) {						\
+		emitm(&stream, 0xa9, 1);				\
+	} else {							\
+		emitm(&stream, 0xf7, 1);				\
+		emitm(&stream, (3 << 6) | r32, 1);			\
+	}								\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* testl sr32,dr32 */
+#define TESTrd(sr32, dr32) do {						\
+	emitm(&stream, 0x85, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* orl sr32,dr32 */
+#define ORrd(sr32, dr32) do {						\
+	emitm(&stream, 0x09, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* orl i32,r32 */
+#define ORid(i32, r32) do {						\
+	if (r32 == EAX) {						\
+		emitm(&stream, 0x0d, 1);				\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (25 << 3) | r32, 1);			\
+	}								\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* shll i8,r32 */
+#define SHLib(i8, r32) do {						\
+	emitm(&stream, 0xc1, 1);					\
+	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* shll %cl,dr32 */
+#define SHL_CLrb(dr32) do {						\
+	emitm(&stream, 0xd3, 1);					\
+	emitm(&stream, (7 << 5) | (dr32 & 0x7), 1);			\
+} while (0)
+
+/* shrl i8,r32 */
+#define SHRib(i8, r32) do {						\
+	emitm(&stream, 0xc1, 1);					\
+	emitm(&stream, (29 << 3) | (r32 & 0x7), 1);			\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* shrl %cl,dr32 */
+#define SHR_CLrb(dr32) do {						\
+	emitm(&stream, 0xd3, 1);					\
+	emitm(&stream, (29 << 3) | (dr32 & 0x7), 1);			\
+} while (0)
+
+/* negl r32 */
+#define NEGd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (27 << 3) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* cmpl sr32,dr32 */
+#define CMPrd(sr32, dr32) do {						\
+	emitm(&stream, 0x39, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((sr32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* cmpl i32,dr32 */
+#define CMPid(i32, dr32) do {						\
+	if (dr32 == EAX){						\
+		emitm(&stream, 0x3d, 1);				\
+		emitm(&stream, i32, 4);					\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (0x1f << 3) | (dr32 & 0x7), 1);		\
+		emitm(&stream, i32, 4);					\
+	}								\
+} while (0)
+
+/* jb off8 */
+#define JBb(off8) do {							\
+	emitm(&stream, 0x72, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* jae off8 */
+#define JAEb(off8) do {							\
+	emitm(&stream, 0x73, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* jne off8 */
+#define JNEb(off8) do {							\
+	emitm(&stream, 0x75, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* ja off8 */
+#define JAb(off8) do {							\
+	emitm(&stream, 0x77, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* jmp off32 */
+#define JMP(off32) do {							\
+	emitm(&stream, 0xe9, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* xorl r32,r32 */
+#define ZEROrd(r32) do {						\
+	emitm(&stream, 0x31, 1);					\
+	emitm(&stream, (3 << 6) | ((r32 & 0x7) << 3) | (r32 & 0x7), 1);	\
+} while (0)
+
+/*
+ * Conditional long jumps
+ */
+#define	JB	0x82
+#define	JAE	0x83
+#define	JE	0x84
+#define	JNE	0x85
+#define	JBE	0x86
+#define	JA	0x87
+
+#define	JCC(t, f) do {							\
+	if (ins->jt != 0 && ins->jf != 0) {				\
+		/* 5 is the size of the following jmp */		\
+		emitm(&stream, ((t) << 8) | 0x0f, 2);			\
+		emitm(&stream, stream.refs[stream.bpf_pc + ins->jt] -	\
+		    stream.refs[stream.bpf_pc] + 5, 4);			\
+		JMP(stream.refs[stream.bpf_pc + ins->jf] -		\
+		    stream.refs[stream.bpf_pc]);			\
+	} else if (ins->jt != 0) {					\
+		emitm(&stream, ((t) << 8) | 0x0f, 2);			\
+		emitm(&stream, stream.refs[stream.bpf_pc + ins->jt] -	\
+		    stream.refs[stream.bpf_pc], 4);			\
+	} else {							\
+		emitm(&stream, ((f) << 8) | 0x0f, 2);			\
+		emitm(&stream, stream.refs[stream.bpf_pc + ins->jf] -	\
+		    stream.refs[stream.bpf_pc], 4);			\
+	}								\
+} while (0)
+
+#define	JUMP(off) do {							\
+	if ((off) != 0)							\
+		JMP(stream.refs[stream.bpf_pc + (off)] -		\
+		    stream.refs[stream.bpf_pc]);			\
+} while (0)
+
+#endif	/* _BPF_JIT_MACHDEP_H_ */
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
new file mode 100644
index 0000000..ed1ccb5
--- /dev/null
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -0,0 +1,541 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+#include "opt_sched.h"
+
+/*****************************************************************************/
+/* Scheduling                                                                */
+/*****************************************************************************/
+
+	.text
+
+#ifdef SMP
+#define LK	lock ;
+#else
+#define LK
+#endif
+
+#if defined(SCHED_ULE) && defined(SMP)
+#define	SETLK	xchgq
+#else
+#define	SETLK	movq
+#endif
+
+/*
+ * cpu_throw()
+ *
+ * This is the second half of cpu_switch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state.  This is only a slight optimization and is probably
+ * not worth it anymore.  Note that we need to clear the pm_active bits so
+ * we do need the old proc if it still exists.
+ * %rdi = oldtd
+ * %rsi = newtd
+ */
+ENTRY(cpu_throw)
+	movl	PCPU(CPUID),%eax
+	testq	%rdi,%rdi
+	jz	1f
+	/* release bit from old pm_active */
+	movq	PCPU(CURPMAP),%rdx
+	LK btrl	%eax,PM_ACTIVE(%rdx)		/* clear old */
+1:
+	movq	TD_PCB(%rsi),%r8		/* newtd->td_pcb */
+	movq	PCB_CR3(%r8),%rdx
+	movq	%rdx,%cr3			/* new address space */
+	jmp	swact
+END(cpu_throw)
+
+/*
+ * cpu_switch(old, new, mtx)
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
+ * %rdi = oldtd
+ * %rsi = newtd
+ * %rdx = mtx
+ */
+ENTRY(cpu_switch)
+	/* Switch to new thread.  First, save context. */
+	movq	TD_PCB(%rdi),%r8
+	orl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
+
+	movq	(%rsp),%rax			/* Hardware registers */
+	movq	%r15,PCB_R15(%r8)
+	movq	%r14,PCB_R14(%r8)
+	movq	%r13,PCB_R13(%r8)
+	movq	%r12,PCB_R12(%r8)
+	movq	%rbp,PCB_RBP(%r8)
+	movq	%rsp,PCB_RSP(%r8)
+	movq	%rbx,PCB_RBX(%r8)
+	movq	%rax,PCB_RIP(%r8)
+
+	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
+	jnz	store_dr			/* static predict not taken */
+done_store_dr:
+
+	/* have we used fp, and need a save? */
+	cmpq	%rdi,PCPU(FPCURTHREAD)
+	jne	3f
+	movq	PCB_SAVEFPU(%r8),%r8
+	clts
+	cmpl	$0,use_xsave
+	jne	1f
+	fxsave	(%r8)
+	jmp	2f
+1:	movq	%rdx,%rcx
+	movl	xsave_mask,%eax
+	movl	xsave_mask+4,%edx
+	.globl	ctx_switch_xsave
+ctx_switch_xsave:
+	/* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
+	xsave	(%r8)
+	movq	%rcx,%rdx
+2:	smsw	%ax
+	orb	$CR0_TS,%al
+	lmsw	%ax
+	xorl	%eax,%eax
+	movq	%rax,PCPU(FPCURTHREAD)
+3:
+
+	/* Save is done.  Now fire up new thread. Leave old vmspace. */
+	movq	TD_PCB(%rsi),%r8
+
+	/* switch address space */
+	movq	PCB_CR3(%r8),%rcx
+	movq	%cr3,%rax
+	cmpq	%rcx,%rax			/* Same address space? */
+	jne	swinact
+	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
+	jmp	sw1
+swinact:
+	movq	%rcx,%cr3			/* new address space */
+	movl	PCPU(CPUID), %eax
+	/* Release bit from old pmap->pm_active */
+	movq	PCPU(CURPMAP),%rcx
+	LK btrl	%eax,PM_ACTIVE(%rcx)		/* clear old */
+	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
+swact:
+	/* Set bit in new pmap->pm_active */
+	movq	TD_PROC(%rsi),%rdx		/* newproc */
+	movq	P_VMSPACE(%rdx), %rdx
+	addq	$VM_PMAP,%rdx
+	LK btsl	%eax,PM_ACTIVE(%rdx)		/* set new */
+	movq	%rdx,PCPU(CURPMAP)
+
+sw1:
+#if defined(SCHED_ULE) && defined(SMP)
+	/* Wait for the new thread to become unblocked */
+	movq	$blocked_lock, %rdx
+1:
+	movq	TD_LOCK(%rsi),%rcx
+	cmpq	%rcx, %rdx
+	pause
+	je	1b
+#endif
+	/*
+	 * At this point, we've switched address spaces and are ready
+	 * to load up the rest of the next context.
+	 */
+
+	/* Skip loading user fsbase/gsbase for kthreads */
+	testl	$TDP_KTHREAD,TD_PFLAGS(%rsi)
+	jnz	do_kthread
+
+	/*
+	 * Load ldt register
+	 */
+	movq	TD_PROC(%rsi),%rcx
+	cmpq	$0, P_MD+MD_LDT(%rcx)
+	jne	do_ldt
+	xorl	%eax,%eax
+ld_ldt:	lldt	%ax
+
+	/* Restore fs base in GDT */
+	movl	PCB_FSBASE(%r8),%eax
+	movq	PCPU(FS32P),%rdx
+	movw	%ax,2(%rdx)
+	shrl	$16,%eax
+	movb	%al,4(%rdx)
+	shrl	$8,%eax
+	movb	%al,7(%rdx)
+
+	/* Restore gs base in GDT */
+	movl	PCB_GSBASE(%r8),%eax
+	movq	PCPU(GS32P),%rdx
+	movw	%ax,2(%rdx)
+	shrl	$16,%eax
+	movb	%al,4(%rdx)
+	shrl	$8,%eax
+	movb	%al,7(%rdx)
+
+do_kthread:
+	/* Do we need to reload tss ? */
+	movq	PCPU(TSSP),%rax
+	movq	PCB_TSSP(%r8),%rdx
+	testq	%rdx,%rdx
+	cmovzq	PCPU(COMMONTSSP),%rdx
+	cmpq	%rax,%rdx
+	jne	do_tss
+done_tss:
+	movq	%r8,PCPU(RSP0)
+	movq	%r8,PCPU(CURPCB)
+	/* Update the TSS_RSP0 pointer for the next interrupt */
+	movq	%r8,COMMON_TSS_RSP0(%rdx)
+	movq	%rsi,PCPU(CURTHREAD)		/* into next thread */
+
+	/* Test if debug registers should be restored. */
+	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
+	jnz	load_dr				/* static predict not taken */
+done_load_dr:
+
+	/* Restore context. */
+	movq	PCB_R15(%r8),%r15
+	movq	PCB_R14(%r8),%r14
+	movq	PCB_R13(%r8),%r13
+	movq	PCB_R12(%r8),%r12
+	movq	PCB_RBP(%r8),%rbp
+	movq	PCB_RSP(%r8),%rsp
+	movq	PCB_RBX(%r8),%rbx
+	movq	PCB_RIP(%r8),%rax
+	movq	%rax,(%rsp)
+	ret
+
+	/*
+	 * We order these strangely for several reasons.
+	 * 1: I wanted to use static branch prediction hints
+	 * 2: Most athlon64/opteron cpus don't have them.  They define
+	 *    a forward branch as 'predict not taken'.  Intel cores have
+	 *    the 'rep' prefix to invert this.
+	 * So, to make it work on both forms of cpu we do the detour.
+	 * We use jumps rather than call in order to avoid the stack.
+	 */
+
+store_dr:
+	movq	%dr7,%rax			/* yes, do the save */
+	movq	%dr0,%r15
+	movq	%dr1,%r14
+	movq	%dr2,%r13
+	movq	%dr3,%r12
+	movq	%dr6,%r11
+	movq	%r15,PCB_DR0(%r8)
+	movq	%r14,PCB_DR1(%r8)
+	movq	%r13,PCB_DR2(%r8)
+	movq	%r12,PCB_DR3(%r8)
+	movq	%r11,PCB_DR6(%r8)
+	movq	%rax,PCB_DR7(%r8)
+	andq	$0x0000fc00, %rax		/* disable all watchpoints */
+	movq	%rax,%dr7
+	jmp	done_store_dr
+
+load_dr:
+	movq	%dr7,%rax
+	movq	PCB_DR0(%r8),%r15
+	movq	PCB_DR1(%r8),%r14
+	movq	PCB_DR2(%r8),%r13
+	movq	PCB_DR3(%r8),%r12
+	movq	PCB_DR6(%r8),%r11
+	movq	PCB_DR7(%r8),%rcx
+	movq	%r15,%dr0
+	movq	%r14,%dr1
+	/* Preserve reserved bits in %dr7 */
+	andq	$0x0000fc00,%rax
+	andq	$~0x0000fc00,%rcx
+	movq	%r13,%dr2
+	movq	%r12,%dr3
+	orq	%rcx,%rax
+	movq	%r11,%dr6
+	movq	%rax,%dr7
+	jmp	done_load_dr
+
+do_tss:	movq	%rdx,PCPU(TSSP)
+	movq	%rdx,%rcx
+	movq	PCPU(TSS),%rax
+	movw	%cx,2(%rax)
+	shrq	$16,%rcx
+	movb	%cl,4(%rax)
+	shrq	$8,%rcx
+	movb	%cl,7(%rax)
+	shrq	$8,%rcx
+	movl	%ecx,8(%rax)
+	movb	$0x89,5(%rax)	/* unset busy */
+	movl	$TSSSEL,%eax
+	ltr	%ax
+	jmp	done_tss
+
+do_ldt:	movq	PCPU(LDT),%rax
+	movq	P_MD+MD_LDT_SD(%rcx),%rdx
+	movq	%rdx,(%rax)
+	movq	P_MD+MD_LDT_SD+8(%rcx),%rdx
+	movq	%rdx,8(%rax)
+	movl	$LDTSEL,%eax
+	jmp	ld_ldt
+END(cpu_switch)
+
+/*
+ * savectx(pcb)
+ * Update pcb, saving current processor state.
+ */
+ENTRY(savectx)
+	/* Save caller's return address. */
+	movq	(%rsp),%rax
+	movq	%rax,PCB_RIP(%rdi)
+
+	movq	%rbx,PCB_RBX(%rdi)
+	movq	%rsp,PCB_RSP(%rdi)
+	movq	%rbp,PCB_RBP(%rdi)
+	movq	%r12,PCB_R12(%rdi)
+	movq	%r13,PCB_R13(%rdi)
+	movq	%r14,PCB_R14(%rdi)
+	movq	%r15,PCB_R15(%rdi)
+
+	movq	%cr0,%rsi
+	movq	%rsi,PCB_CR0(%rdi)
+	movq	%cr2,%rax
+	movq	%rax,PCB_CR2(%rdi)
+	movq	%cr3,%rax
+	movq	%rax,PCB_CR3(%rdi)
+	movq	%cr4,%rax
+	movq	%rax,PCB_CR4(%rdi)
+
+	movq	%dr0,%rax
+	movq	%rax,PCB_DR0(%rdi)
+	movq	%dr1,%rax
+	movq	%rax,PCB_DR1(%rdi)
+	movq	%dr2,%rax
+	movq	%rax,PCB_DR2(%rdi)
+	movq	%dr3,%rax
+	movq	%rax,PCB_DR3(%rdi)
+	movq	%dr6,%rax
+	movq	%rax,PCB_DR6(%rdi)
+	movq	%dr7,%rax
+	movq	%rax,PCB_DR7(%rdi)
+
+	movl	$MSR_FSBASE,%ecx
+	rdmsr
+	movl	%eax,PCB_FSBASE(%rdi)
+	movl	%edx,PCB_FSBASE+4(%rdi)
+	movl	$MSR_GSBASE,%ecx
+	rdmsr
+	movl	%eax,PCB_GSBASE(%rdi)
+	movl	%edx,PCB_GSBASE+4(%rdi)
+	movl	$MSR_KGSBASE,%ecx
+	rdmsr
+	movl	%eax,PCB_KGSBASE(%rdi)
+	movl	%edx,PCB_KGSBASE+4(%rdi)
+	movl	$MSR_EFER,%ecx
+	rdmsr
+	movl	%eax,PCB_EFER(%rdi)
+	movl	%edx,PCB_EFER+4(%rdi)
+	movl	$MSR_STAR,%ecx
+	rdmsr
+	movl	%eax,PCB_STAR(%rdi)
+	movl	%edx,PCB_STAR+4(%rdi)
+	movl	$MSR_LSTAR,%ecx
+	rdmsr
+	movl	%eax,PCB_LSTAR(%rdi)
+	movl	%edx,PCB_LSTAR+4(%rdi)
+	movl	$MSR_CSTAR,%ecx
+	rdmsr
+	movl	%eax,PCB_CSTAR(%rdi)
+	movl	%edx,PCB_CSTAR+4(%rdi)
+	movl	$MSR_SF_MASK,%ecx
+	rdmsr
+	movl	%eax,PCB_SFMASK(%rdi)
+	movl	%edx,PCB_SFMASK+4(%rdi)
+	movl	xsave_mask,%eax
+	movl	%eax,PCB_XSMASK(%rdi)
+	movl	xsave_mask+4,%eax
+	movl	%eax,PCB_XSMASK+4(%rdi)
+
+	sgdt	PCB_GDT(%rdi)
+	sidt	PCB_IDT(%rdi)
+	sldt	PCB_LDT(%rdi)
+	str	PCB_TR(%rdi)
+
+2:	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
+
+	movl	$1,%eax
+	ret
+END(savectx)
+
+/*
+ * resumectx(pcb)
+ * Resuming processor state from pcb.
+ */     
+ENTRY(resumectx)
+	/* Switch to KPML4phys. */
+	movq	KPML4phys,%rax
+	movq	%rax,%cr3
+
+	/* Force kernel segment registers. */
+	movl	$KDSEL,%eax
+	movw	%ax,%ds
+	movw	%ax,%es
+	movw	%ax,%ss
+	movl	$KUF32SEL,%eax
+	movw	%ax,%fs
+	movl	$KUG32SEL,%eax
+	movw	%ax,%gs
+
+	movl	$MSR_FSBASE,%ecx
+	movl	PCB_FSBASE(%rdi),%eax
+	movl	4 + PCB_FSBASE(%rdi),%edx
+	wrmsr
+	movl	$MSR_GSBASE,%ecx
+	movl	PCB_GSBASE(%rdi),%eax
+	movl	4 + PCB_GSBASE(%rdi),%edx
+	wrmsr
+	movl	$MSR_KGSBASE,%ecx
+	movl	PCB_KGSBASE(%rdi),%eax
+	movl	4 + PCB_KGSBASE(%rdi),%edx
+	wrmsr
+
+	/* Restore EFER. */
+	movl	$MSR_EFER,%ecx
+	movl	PCB_EFER(%rdi),%eax
+	wrmsr
+
+	/* Restore fast syscall stuff. */
+	movl	$MSR_STAR,%ecx
+	movl	PCB_STAR(%rdi),%eax
+	movl	4 + PCB_STAR(%rdi),%edx
+	wrmsr
+	movl	$MSR_LSTAR,%ecx
+	movl	PCB_LSTAR(%rdi),%eax
+	movl	4 + PCB_LSTAR(%rdi),%edx
+	wrmsr
+	movl	$MSR_CSTAR,%ecx
+	movl	PCB_CSTAR(%rdi),%eax
+	movl	4 + PCB_CSTAR(%rdi),%edx
+	wrmsr
+	movl	$MSR_SF_MASK,%ecx
+	movl	PCB_SFMASK(%rdi),%eax
+	wrmsr
+
+	/* Restore CR0 except for FPU mode. */
+	movq	PCB_CR0(%rdi),%rax
+	andq	$~(CR0_EM | CR0_TS),%rax
+	movq	%rax,%cr0
+
+	/* Restore CR2, CR4 and CR3. */
+	movq	PCB_CR2(%rdi),%rax
+	movq	%rax,%cr2
+	movq	PCB_CR4(%rdi),%rax
+	movq	%rax,%cr4
+	movq	PCB_CR3(%rdi),%rax
+	movq	%rax,%cr3
+
+	/* Restore descriptor tables. */
+	lidt	PCB_IDT(%rdi)
+	lldt	PCB_LDT(%rdi)
+
+#define	SDT_SYSTSS	9
+#define	SDT_SYSBSY	11
+
+	/* Clear "task busy" bit and reload TR. */
+	movq	PCPU(TSS),%rax
+	andb	$(~SDT_SYSBSY | SDT_SYSTSS),5(%rax)
+	movw	PCB_TR(%rdi),%ax
+	ltr	%ax
+
+#undef	SDT_SYSTSS
+#undef	SDT_SYSBSY
+
+	/* Restore debug registers. */
+	movq	PCB_DR0(%rdi),%rax
+	movq	%rax,%dr0
+	movq	PCB_DR1(%rdi),%rax
+	movq	%rax,%dr1
+	movq	PCB_DR2(%rdi),%rax
+	movq	%rax,%dr2
+	movq	PCB_DR3(%rdi),%rax
+	movq	%rax,%dr3
+	movq	PCB_DR6(%rdi),%rax
+	movq	%rax,%dr6
+	movq	PCB_DR7(%rdi),%rax
+	movq	%rax,%dr7
+
+	/* Restore FPU state. */
+	fninit
+	movq	PCB_FPUSUSPEND(%rdi),%rbx
+	movq	PCB_XSMASK(%rdi),%rax
+	testq	%rax,%rax
+	jz	1f
+	movq	%rax,%rdx
+	shrq	$32,%rdx
+	movl	$XCR0,%ecx
+	xsetbv
+	xrstor	(%rbx)
+	jmp	2f
+1:
+	fxrstor	(%rbx)
+2:
+
+	/* Reload CR0. */
+	movq	PCB_CR0(%rdi),%rax
+	movq	%rax,%cr0
+
+	/* Restore other callee saved registers. */
+	movq	PCB_R15(%rdi),%r15
+	movq	PCB_R14(%rdi),%r14
+	movq	PCB_R13(%rdi),%r13
+	movq	PCB_R12(%rdi),%r12
+	movq	PCB_RBP(%rdi),%rbp
+	movq	PCB_RSP(%rdi),%rsp
+	movq	PCB_RBX(%rdi),%rbx
+
+	/* Restore return address. */
+	movq	PCB_RIP(%rdi),%rax
+	movq	%rax,(%rsp)
+
+	xorl	%eax,%eax
+	ret
+END(resumectx)
+
+/*
+ * Wrapper around fpusave to care about TS0_CR.
+ */
+ENTRY(ctx_fpusave)
+	movq	%cr0,%rsi
+	clts
+	call	fpusave
+	movq	%rsi,%cr0
+	ret
+END(ctx_fpusave)
diff --git a/sys/amd64/amd64/db_disasm.c b/sys/amd64/amd64/db_disasm.c
new file mode 100644
index 0000000..46144e0
--- /dev/null
+++ b/sys/amd64/amd64/db_disasm.c
@@ -0,0 +1,1637 @@
+/*-
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Instruction disassembler.
+ */
+#include <sys/param.h>
+#include <sys/libkern.h>
+
+#include <ddb/ddb.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+
+/*
+ * Size attributes
+ */
+#define	BYTE	0
+#define	WORD	1
+#define	LONG	2
+#define	QUAD	3
+#define	SNGL	4
+#define	DBLR	5
+#define	EXTR	6
+#define	SDEP	7
+#define	ADEP	8
+#define	ESC	9
+#define	NONE	10
+
+/*
+ * REX prefix and bits
+ */
+#define REX_B	1
+#define REX_X	2
+#define REX_R	4
+#define REX_W	8
+#define REX	0x40
+
+/*
+ * Addressing modes
+ */
+#define	E	1			/* general effective address */
+#define	Eind	2			/* indirect address (jump, call) */
+#define	Ew	3			/* address, word size */
+#define	Eb	4			/* address, byte size */
+#define	R	5			/* register, in 'reg' field */
+#define	Rw	6			/* word register, in 'reg' field */
+#define	Rq	39			/* quad register, in 'reg' field */
+#define	Ri	7			/* register in instruction */
+#define	S	8			/* segment reg, in 'reg' field */
+#define	Si	9			/* segment reg, in instruction */
+#define	A	10			/* accumulator */
+#define	BX	11			/* (bx) */
+#define	CL	12			/* cl, for shifts */
+#define	DX	13			/* dx, for IO */
+#define	SI	14			/* si */
+#define	DI	15			/* di */
+#define	CR	16			/* control register */
+#define	DR	17			/* debug register */
+#define	TR	18			/* test register */
+#define	I	19			/* immediate, unsigned */
+#define	Is	20			/* immediate, signed */
+#define	Ib	21			/* byte immediate, unsigned */
+#define	Ibs	22			/* byte immediate, signed */
+#define	Iw	23			/* word immediate, unsigned */
+#define	Ilq	24			/* long/quad immediate, unsigned */
+#define	O	25			/* direct address */
+#define	Db	26			/* byte displacement from EIP */
+#define	Dl	27			/* long displacement from EIP */
+#define	o1	28			/* constant 1 */
+#define	o3	29			/* constant 3 */
+#define	OS	30			/* immediate offset/segment */
+#define	ST	31			/* FP stack top */
+#define	STI	32			/* FP stack */
+#define	X	33			/* extended FP op */
+#define	XA	34			/* for 'fstcw %ax' */
+#define	El	35			/* address, long/quad size */
+#define	Ril	36			/* long register in instruction */
+#define	Iba	37			/* byte immediate, don't print if 0xa */
+#define	EL	38			/* address, explicitly long size */
+
+struct inst {
+	const char *	i_name;		/* name */
+	short	i_has_modrm;		/* has regmodrm byte */
+	short	i_size;			/* operand size */
+	int	i_mode;			/* addressing modes */
+	const void *	i_extra;	/* pointer to extra opcode table */
+};
+
+#define	op1(x)		(x)
+#define	op2(x,y)	((x)|((y)<<8))
+#define	op3(x,y,z)	((x)|((y)<<8)|((z)<<16))
+
+struct finst {
+	const char *	f_name;		/* name for memory instruction */
+	int	f_size;			/* size for memory instruction */
+	int	f_rrmode;		/* mode for rr instruction */
+	const void *	f_rrname;	/* name for rr instruction
+					   (or pointer to table) */
+};
+
+static const struct inst db_inst_0f388x[] = {
+/*80*/	{ "",	   TRUE,  SDEP,  op2(E, Rq),  "invept" },
+/*81*/	{ "",	   TRUE,  SDEP,  op2(E, Rq),  "invvpid" },
+/*82*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*83*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*84*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*85*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*86*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*87*/	{ "",	   FALSE, NONE,  0,	      0 },
+
+/*88*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*89*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8a*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8b*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8c*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8d*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8e*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*8f*/	{ "",	   FALSE, NONE,  0,	      0 },
+};
+
+static const struct inst * const db_inst_0f38[] = {
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	db_inst_0f388x,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0
+};
+
+static const char * const db_Grp6[] = {
+	"sldt",
+	"str",
+	"lldt",
+	"ltr",
+	"verr",
+	"verw",
+	"",
+	""
+};
+
+static const char * const db_Grp7[] = {
+	"sgdt",
+	"sidt",
+	"lgdt",
+	"lidt",
+	"smsw",
+	"",
+	"lmsw",
+	"invlpg"
+};
+
+static const char * const db_Grp8[] = {
+	"",
+	"",
+	"",
+	"",
+	"bt",
+	"bts",
+	"btr",
+	"btc"
+};
+
+static const char * const db_Grp9[] = {
+	"",
+	"cmpxchg8b",
+	"",
+	"",
+	"",
+	"",
+	"vmptrld",
+	"vmptrst"
+};
+
+static const char * const db_Grp15[] = {
+	"fxsave",
+	"fxrstor",
+	"ldmxcsr",
+	"stmxcsr",
+	"xsave",
+	"xrstor",
+	"xsaveopt",
+	"clflush"
+};
+
+static const char * const db_Grp15b[] = {
+	"",
+	"",
+	"",
+	"",
+	"",
+	"lfence",
+	"mfence",
+	"sfence"
+};
+
+static const struct inst db_inst_0f0x[] = {
+/*00*/	{ "",	   TRUE,  NONE,  op1(Ew),     db_Grp6 },
+/*01*/	{ "",	   TRUE,  NONE,  op1(Ew),     db_Grp7 },
+/*02*/	{ "lar",   TRUE,  LONG,  op2(E,R),    0 },
+/*03*/	{ "lsl",   TRUE,  LONG,  op2(E,R),    0 },
+/*04*/	{ "",      FALSE, NONE,  0,	      0 },
+/*05*/	{ "syscall",FALSE,NONE,  0,	      0 },
+/*06*/	{ "clts",  FALSE, NONE,  0,	      0 },
+/*07*/	{ "sysret",FALSE, NONE,  0,	      0 },
+
+/*08*/	{ "invd",  FALSE, NONE,  0,	      0 },
+/*09*/	{ "wbinvd",FALSE, NONE,  0,	      0 },
+/*0a*/	{ "",      FALSE, NONE,  0,	      0 },
+/*0b*/	{ "",      FALSE, NONE,  0,	      0 },
+/*0c*/	{ "",      FALSE, NONE,  0,	      0 },
+/*0d*/	{ "",      FALSE, NONE,  0,	      0 },
+/*0e*/	{ "",      FALSE, NONE,  0,	      0 },
+/*0f*/	{ "",      FALSE, NONE,  0,	      0 },
+};
+
+static const struct inst db_inst_0f2x[] = {
+/*20*/	{ "mov",   TRUE,  LONG,  op2(CR,El),  0 },
+/*21*/	{ "mov",   TRUE,  LONG,  op2(DR,El),  0 },
+/*22*/	{ "mov",   TRUE,  LONG,  op2(El,CR),  0 },
+/*23*/	{ "mov",   TRUE,  LONG,  op2(El,DR),  0 },
+/*24*/	{ "mov",   TRUE,  LONG,  op2(TR,El),  0 },
+/*25*/	{ "",      FALSE, NONE,  0,	      0 },
+/*26*/	{ "mov",   TRUE,  LONG,  op2(El,TR),  0 },
+/*27*/	{ "",      FALSE, NONE,  0,	      0 },
+
+/*28*/	{ "",      FALSE, NONE,  0,	      0 },
+/*29*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2a*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2b*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2c*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2d*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2e*/	{ "",      FALSE, NONE,  0,	      0 },
+/*2f*/	{ "",      FALSE, NONE,  0,	      0 },
+};
+
+static const struct inst db_inst_0f3x[] = {
+/*30*/	{ "wrmsr", FALSE, NONE,  0,	      0 },
+/*31*/	{ "rdtsc", FALSE, NONE,  0,	      0 },
+/*32*/	{ "rdmsr", FALSE, NONE,  0,	      0 },
+/*33*/	{ "rdpmc", FALSE, NONE,  0,	      0 },
+/*34*/	{ "sysenter",FALSE,NONE,  0,	      0 },
+/*35*/	{ "sysexit",FALSE,NONE,  0,	      0 },
+/*36*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*37*/	{ "getsec",FALSE, NONE,  0,	      0 },
+
+/*38*/	{ "",	   FALSE, ESC,  0,	      db_inst_0f38 },
+/*39*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3a*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3b*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3c*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3d*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3e*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*3f*/	{ "",	   FALSE, NONE,  0,	      0 },
+};
+
+static const struct inst db_inst_0f4x[] = {
+/*40*/	{ "cmovo",  TRUE, NONE,  op2(E, R),   0 },
+/*41*/	{ "cmovno", TRUE, NONE,  op2(E, R),   0 },
+/*42*/	{ "cmovb",  TRUE, NONE,  op2(E, R),   0 },
+/*43*/	{ "cmovnb", TRUE, NONE,  op2(E, R),   0 },
+/*44*/	{ "cmovz",  TRUE, NONE,  op2(E, R),   0 },
+/*45*/	{ "cmovnz", TRUE, NONE,  op2(E, R),   0 },
+/*46*/	{ "cmovbe", TRUE, NONE,  op2(E, R),   0 },
+/*47*/	{ "cmovnbe",TRUE, NONE,  op2(E, R),   0 },
+
+/*48*/	{ "cmovs",  TRUE, NONE,  op2(E, R),   0 },
+/*49*/	{ "cmovns", TRUE, NONE,  op2(E, R),   0 },
+/*4a*/	{ "cmovp",  TRUE, NONE,  op2(E, R),   0 },
+/*4b*/	{ "cmovnp", TRUE, NONE,  op2(E, R),   0 },
+/*4c*/	{ "cmovl",  TRUE, NONE,  op2(E, R),   0 },
+/*4d*/	{ "cmovnl", TRUE, NONE,  op2(E, R),   0 },
+/*4e*/	{ "cmovle", TRUE, NONE,  op2(E, R),   0 },
+/*4f*/	{ "cmovnle",TRUE, NONE,  op2(E, R),   0 },
+};
+
+static const struct inst db_inst_0f7x[] = {
+/*70*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*71*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*72*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*73*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*74*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*75*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*76*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*77*/	{ "",	   FALSE, NONE,  0,	      0 },
+
+/*78*/	{ "vmread", TRUE, NONE,  op2(Rq, E),  0 },
+/*79*/	{ "vmwrite",TRUE, NONE,  op2(E, Rq),  0 },
+/*7a*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*7b*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*7c*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*7d*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*7e*/	{ "",	   FALSE, NONE,  0,	      0 },
+/*7f*/	{ "",	   FALSE, NONE,  0,	      0 },
+};
+
+static const struct inst db_inst_0f8x[] = {
+/*80*/	{ "jo",    FALSE, NONE,  op1(Dl),     0 },
+/*81*/	{ "jno",   FALSE, NONE,  op1(Dl),     0 },
+/*82*/	{ "jb",    FALSE, NONE,  op1(Dl),     0 },
+/*83*/	{ "jnb",   FALSE, NONE,  op1(Dl),     0 },
+/*84*/	{ "jz",    FALSE, NONE,  op1(Dl),     0 },
+/*85*/	{ "jnz",   FALSE, NONE,  op1(Dl),     0 },
+/*86*/	{ "jbe",   FALSE, NONE,  op1(Dl),     0 },
+/*87*/	{ "jnbe",  FALSE, NONE,  op1(Dl),     0 },
+
+/*88*/	{ "js",    FALSE, NONE,  op1(Dl),     0 },
+/*89*/	{ "jns",   FALSE, NONE,  op1(Dl),     0 },
+/*8a*/	{ "jp",    FALSE, NONE,  op1(Dl),     0 },
+/*8b*/	{ "jnp",   FALSE, NONE,  op1(Dl),     0 },
+/*8c*/	{ "jl",    FALSE, NONE,  op1(Dl),     0 },
+/*8d*/	{ "jnl",   FALSE, NONE,  op1(Dl),     0 },
+/*8e*/	{ "jle",   FALSE, NONE,  op1(Dl),     0 },
+/*8f*/	{ "jnle",  FALSE, NONE,  op1(Dl),     0 },
+};
+
+static const struct inst db_inst_0f9x[] = {
+/*90*/	{ "seto",  TRUE,  NONE,  op1(Eb),     0 },
+/*91*/	{ "setno", TRUE,  NONE,  op1(Eb),     0 },
+/*92*/	{ "setb",  TRUE,  NONE,  op1(Eb),     0 },
+/*93*/	{ "setnb", TRUE,  NONE,  op1(Eb),     0 },
+/*94*/	{ "setz",  TRUE,  NONE,  op1(Eb),     0 },
+/*95*/	{ "setnz", TRUE,  NONE,  op1(Eb),     0 },
+/*96*/	{ "setbe", TRUE,  NONE,  op1(Eb),     0 },
+/*97*/	{ "setnbe",TRUE,  NONE,  op1(Eb),     0 },
+
+/*98*/	{ "sets",  TRUE,  NONE,  op1(Eb),     0 },
+/*99*/	{ "setns", TRUE,  NONE,  op1(Eb),     0 },
+/*9a*/	{ "setp",  TRUE,  NONE,  op1(Eb),     0 },
+/*9b*/	{ "setnp", TRUE,  NONE,  op1(Eb),     0 },
+/*9c*/	{ "setl",  TRUE,  NONE,  op1(Eb),     0 },
+/*9d*/	{ "setnl", TRUE,  NONE,  op1(Eb),     0 },
+/*9e*/	{ "setle", TRUE,  NONE,  op1(Eb),     0 },
+/*9f*/	{ "setnle",TRUE,  NONE,  op1(Eb),     0 },
+};
+
+static const struct inst db_inst_0fax[] = {
+/*a0*/	{ "push",  FALSE, NONE,  op1(Si),     0 },
+/*a1*/	{ "pop",   FALSE, NONE,  op1(Si),     0 },
+/*a2*/	{ "cpuid", FALSE, NONE,  0,	      0 },
+/*a3*/	{ "bt",    TRUE,  LONG,  op2(R,E),    0 },
+/*a4*/	{ "shld",  TRUE,  LONG,  op3(Ib,R,E), 0 },
+/*a5*/	{ "shld",  TRUE,  LONG,  op3(CL,R,E), 0 },
+/*a6*/	{ "",      FALSE, NONE,  0,	      0 },
+/*a7*/	{ "",      FALSE, NONE,  0,	      0 },
+
+/*a8*/	{ "push",  FALSE, NONE,  op1(Si),     0 },
+/*a9*/	{ "pop",   FALSE, NONE,  op1(Si),     0 },
+/*aa*/	{ "rsm",   FALSE, NONE,  0,	      0 },
+/*ab*/	{ "bts",   TRUE,  LONG,  op2(R,E),    0 },
+/*ac*/	{ "shrd",  TRUE,  LONG,  op3(Ib,R,E), 0 },
+/*ad*/	{ "shrd",  TRUE,  LONG,  op3(CL,R,E), 0 },
+/*ae*/	{ "",      TRUE,  LONG,  op1(E),      db_Grp15 },
+/*af*/	{ "imul",  TRUE,  LONG,  op2(E,R),    0 },
+};
+
+static const struct inst db_inst_0fbx[] = {
+/*b0*/	{ "cmpxchg",TRUE, BYTE,	 op2(R, E),   0 },
+/*b0*/	{ "cmpxchg",TRUE, LONG,	 op2(R, E),   0 },
+/*b2*/	{ "lss",   TRUE,  LONG,  op2(E, R),   0 },
+/*b3*/	{ "btr",   TRUE,  LONG,  op2(R, E),   0 },
+/*b4*/	{ "lfs",   TRUE,  LONG,  op2(E, R),   0 },
+/*b5*/	{ "lgs",   TRUE,  LONG,  op2(E, R),   0 },
+/*b6*/	{ "movzb", TRUE,  LONG,  op2(Eb, R),  0 },
+/*b7*/	{ "movzw", TRUE,  LONG,  op2(Ew, R),  0 },
+
+/*b8*/	{ "",      FALSE, NONE,  0,	      0 },
+/*b9*/	{ "",      FALSE, NONE,  0,	      0 },
+/*ba*/	{ "",      TRUE,  LONG,  op2(Ib, E),  db_Grp8 },
+/*bb*/	{ "btc",   TRUE,  LONG,  op2(R, E),   0 },
+/*bc*/	{ "bsf",   TRUE,  LONG,  op2(E, R),   0 },
+/*bd*/	{ "bsr",   TRUE,  LONG,  op2(E, R),   0 },
+/*be*/	{ "movsb", TRUE,  LONG,  op2(Eb, R),  0 },
+/*bf*/	{ "movsw", TRUE,  LONG,  op2(Ew, R),  0 },
+};
+
+static const struct inst db_inst_0fcx[] = {
+/*c0*/	{ "xadd",  TRUE,  BYTE,	 op2(R, E),   0 },
+/*c1*/	{ "xadd",  TRUE,  LONG,	 op2(R, E),   0 },
+/*c2*/	{ "",	   FALSE, NONE,	 0,	      0 },
+/*c3*/	{ "",	   FALSE, NONE,	 0,	      0 },
+/*c4*/	{ "",	   FALSE, NONE,	 0,	      0 },
+/*c5*/	{ "",	   FALSE, NONE,	 0,	      0 },
+/*c6*/	{ "",	   FALSE, NONE,	 0,	      0 },
+/*c7*/	{ "",	   TRUE,  NONE,  op1(E),      db_Grp9 },
+/*c8*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*c9*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*ca*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*cb*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*cc*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*cd*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*ce*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+/*cf*/	{ "bswap", FALSE, LONG,  op1(Ril),    0 },
+};
+
+static const struct inst * const db_inst_0f[] = {
+	db_inst_0f0x,
+	0,
+	db_inst_0f2x,
+	db_inst_0f3x,
+	db_inst_0f4x,
+	0,
+	0,
+	db_inst_0f7x,
+	db_inst_0f8x,
+	db_inst_0f9x,
+	db_inst_0fax,
+	db_inst_0fbx,
+	db_inst_0fcx,
+	0,
+	0,
+	0
+};
+
+static const char * const db_Esc92[] = {
+	"fnop",	"",	"",	"",	"",	"",	"",	""
+};
+static const char * const db_Esc94[] = {
+	"fchs",	"fabs",	"",	"",	"ftst",	"fxam",	"",	""
+};
+static const char * const db_Esc95[] = {
+	"fld1",	"fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz",""
+};
+static const char * const db_Esc96[] = {
+	"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp",
+	"fincstp"
+};
+static const char * const db_Esc97[] = {
+	"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"
+};
+
+static const char * const db_Esca5[] = {
+	"",	"fucompp","",	"",	"",	"",	"",	""
+};
+
+static const char * const db_Escb4[] = {
+	"fneni","fndisi",	"fnclex","fninit","fsetpm",	"",	"",	""
+};
+
+static const char * const db_Esce3[] = {
+	"",	"fcompp","",	"",	"",	"",	"",	""
+};
+
+static const char * const db_Escf4[] = {
+	"fnstsw","",	"",	"",	"",	"",	"",	""
+};
+
+static const struct finst db_Esc8[] = {
+/*0*/	{ "fadd",   SNGL,  op2(STI,ST),	0 },
+/*1*/	{ "fmul",   SNGL,  op2(STI,ST),	0 },
+/*2*/	{ "fcom",   SNGL,  op2(STI,ST),	0 },
+/*3*/	{ "fcomp",  SNGL,  op2(STI,ST),	0 },
+/*4*/	{ "fsub",   SNGL,  op2(STI,ST),	0 },
+/*5*/	{ "fsubr",  SNGL,  op2(STI,ST),	0 },
+/*6*/	{ "fdiv",   SNGL,  op2(STI,ST),	0 },
+/*7*/	{ "fdivr",  SNGL,  op2(STI,ST),	0 },
+};
+
+static const struct finst db_Esc9[] = {
+/*0*/	{ "fld",    SNGL,  op1(STI),	0 },
+/*1*/	{ "",       NONE,  op1(STI),	"fxch" },
+/*2*/	{ "fst",    SNGL,  op1(X),	db_Esc92 },
+/*3*/	{ "fstp",   SNGL,  0,		0 },
+/*4*/	{ "fldenv", NONE,  op1(X),	db_Esc94 },
+/*5*/	{ "fldcw",  NONE,  op1(X),	db_Esc95 },
+/*6*/	{ "fnstenv",NONE,  op1(X),	db_Esc96 },
+/*7*/	{ "fnstcw", NONE,  op1(X),	db_Esc97 },
+};
+
+static const struct finst db_Esca[] = {
+/*0*/	{ "fiadd",  LONG,  0,		0 },
+/*1*/	{ "fimul",  LONG,  0,		0 },
+/*2*/	{ "ficom",  LONG,  0,		0 },
+/*3*/	{ "ficomp", LONG,  0,		0 },
+/*4*/	{ "fisub",  LONG,  0,		0 },
+/*5*/	{ "fisubr", LONG,  op1(X),	db_Esca5 },
+/*6*/	{ "fidiv",  LONG,  0,		0 },
+/*7*/	{ "fidivr", LONG,  0,		0 }
+};
+
+static const struct finst db_Escb[] = {
+/*0*/	{ "fild",   LONG,  0,		0 },
+/*1*/	{ "",       NONE,  0,		0 },
+/*2*/	{ "fist",   LONG,  0,		0 },
+/*3*/	{ "fistp",  LONG,  0,		0 },
+/*4*/	{ "",       WORD,  op1(X),	db_Escb4 },
+/*5*/	{ "fld",    EXTR,  0,		0 },
+/*6*/	{ "",       WORD,  0,		0 },
+/*7*/	{ "fstp",   EXTR,  0,		0 },
+};
+
+static const struct finst db_Escc[] = {
+/*0*/	{ "fadd",   DBLR,  op2(ST,STI),	0 },
+/*1*/	{ "fmul",   DBLR,  op2(ST,STI),	0 },
+/*2*/	{ "fcom",   DBLR,  0,		0 },
+/*3*/	{ "fcomp",  DBLR,  0,		0 },
+/*4*/	{ "fsub",   DBLR,  op2(ST,STI),	"fsubr" },
+/*5*/	{ "fsubr",  DBLR,  op2(ST,STI),	"fsub" },
+/*6*/	{ "fdiv",   DBLR,  op2(ST,STI),	"fdivr" },
+/*7*/	{ "fdivr",  DBLR,  op2(ST,STI),	"fdiv" },
+};
+
+static const struct finst db_Escd[] = {
+/*0*/	{ "fld",    DBLR,  op1(STI),	"ffree" },
+/*1*/	{ "",       NONE,  0,		0 },
+/*2*/	{ "fst",    DBLR,  op1(STI),	0 },
+/*3*/	{ "fstp",   DBLR,  op1(STI),	0 },
+/*4*/	{ "frstor", NONE,  op1(STI),	"fucom" },
+/*5*/	{ "",       NONE,  op1(STI),	"fucomp" },
+/*6*/	{ "fnsave", NONE,  0,		0 },
+/*7*/	{ "fnstsw", NONE,  0,		0 },
+};
+
+static const struct finst db_Esce[] = {
+/*0*/	{ "fiadd",  WORD,  op2(ST,STI),	"faddp" },
+/*1*/	{ "fimul",  WORD,  op2(ST,STI),	"fmulp" },
+/*2*/	{ "ficom",  WORD,  0,		0 },
+/*3*/	{ "ficomp", WORD,  op1(X),	db_Esce3 },
+/*4*/	{ "fisub",  WORD,  op2(ST,STI),	"fsubrp" },
+/*5*/	{ "fisubr", WORD,  op2(ST,STI),	"fsubp" },
+/*6*/	{ "fidiv",  WORD,  op2(ST,STI),	"fdivrp" },
+/*7*/	{ "fidivr", WORD,  op2(ST,STI),	"fdivp" },
+};
+
+static const struct finst db_Escf[] = {
+/*0*/	{ "fild",   WORD,  0,		0 },
+/*1*/	{ "",       NONE,  0,		0 },
+/*2*/	{ "fist",   WORD,  0,		0 },
+/*3*/	{ "fistp",  WORD,  0,		0 },
+/*4*/	{ "fbld",   NONE,  op1(XA),	db_Escf4 },
+/*5*/	{ "fild",   QUAD,  0,		0 },
+/*6*/	{ "fbstp",  NONE,  0,		0 },
+/*7*/	{ "fistp",  QUAD,  0,		0 },
+};
+
+static const struct finst * const db_Esc_inst[] = {
+	db_Esc8, db_Esc9, db_Esca, db_Escb,
+	db_Escc, db_Escd, db_Esce, db_Escf
+};
+
+static const char * const db_Grp1[] = {
+	"add",
+	"or",
+	"adc",
+	"sbb",
+	"and",
+	"sub",
+	"xor",
+	"cmp"
+};
+
+static const char * const db_Grp2[] = {
+	"rol",
+	"ror",
+	"rcl",
+	"rcr",
+	"shl",
+	"shr",
+	"shl",
+	"sar"
+};
+
+static const struct inst db_Grp3[] = {
+	{ "test",  TRUE, NONE, op2(I,E), 0 },
+	{ "test",  TRUE, NONE, op2(I,E), 0 },
+	{ "not",   TRUE, NONE, op1(E),   0 },
+	{ "neg",   TRUE, NONE, op1(E),   0 },
+	{ "mul",   TRUE, NONE, op2(E,A), 0 },
+	{ "imul",  TRUE, NONE, op2(E,A), 0 },
+	{ "div",   TRUE, NONE, op2(E,A), 0 },
+	{ "idiv",  TRUE, NONE, op2(E,A), 0 },
+};
+
+static const struct inst db_Grp4[] = {
+	{ "inc",   TRUE, BYTE, op1(E),   0 },
+	{ "dec",   TRUE, BYTE, op1(E),   0 },
+	{ "",      TRUE, NONE, 0,	 0 },
+	{ "",      TRUE, NONE, 0,	 0 },
+	{ "",      TRUE, NONE, 0,	 0 },
+	{ "",      TRUE, NONE, 0,	 0 },
+	{ "",      TRUE, NONE, 0,	 0 },
+	{ "",      TRUE, NONE, 0,	 0 }
+};
+
+static const struct inst db_Grp5[] = {
+	{ "inc",   TRUE, LONG, op1(E),   0 },
+	{ "dec",   TRUE, LONG, op1(E),   0 },
+	{ "call",  TRUE, LONG, op1(Eind),0 },
+	{ "lcall", TRUE, LONG, op1(Eind),0 },
+	{ "jmp",   TRUE, LONG, op1(Eind),0 },
+	{ "ljmp",  TRUE, LONG, op1(Eind),0 },
+	{ "push",  TRUE, LONG, op1(E),   0 },
+	{ "",      TRUE, NONE, 0,	 0 }
+};
+
+static const struct inst db_inst_table[256] = {
+/*00*/	{ "add",   TRUE,  BYTE,  op2(R, E),  0 },
+/*01*/	{ "add",   TRUE,  LONG,  op2(R, E),  0 },
+/*02*/	{ "add",   TRUE,  BYTE,  op2(E, R),  0 },
+/*03*/	{ "add",   TRUE,  LONG,  op2(E, R),  0 },
+/*04*/	{ "add",   FALSE, BYTE,  op2(I, A),  0 },
+/*05*/	{ "add",   FALSE, LONG,  op2(Is, A), 0 },
+/*06*/	{ "push",  FALSE, NONE,  op1(Si),    0 },
+/*07*/	{ "pop",   FALSE, NONE,  op1(Si),    0 },
+
+/*08*/	{ "or",    TRUE,  BYTE,  op2(R, E),  0 },
+/*09*/	{ "or",    TRUE,  LONG,  op2(R, E),  0 },
+/*0a*/	{ "or",    TRUE,  BYTE,  op2(E, R),  0 },
+/*0b*/	{ "or",    TRUE,  LONG,  op2(E, R),  0 },
+/*0c*/	{ "or",    FALSE, BYTE,  op2(I, A),  0 },
+/*0d*/	{ "or",    FALSE, LONG,  op2(I, A),  0 },
+/*0e*/	{ "push",  FALSE, NONE,  op1(Si),    0 },
+/*0f*/	{ "",      FALSE, ESC,   0,	     db_inst_0f },
+
+/*10*/	{ "adc",   TRUE,  BYTE,  op2(R, E),  0 },
+/*11*/	{ "adc",   TRUE,  LONG,  op2(R, E),  0 },
+/*12*/	{ "adc",   TRUE,  BYTE,  op2(E, R),  0 },
+/*13*/	{ "adc",   TRUE,  LONG,  op2(E, R),  0 },
+/*14*/	{ "adc",   FALSE, BYTE,  op2(I, A),  0 },
+/*15*/	{ "adc",   FALSE, LONG,  op2(Is, A), 0 },
+/*16*/	{ "push",  FALSE, NONE,  op1(Si),    0 },
+/*17*/	{ "pop",   FALSE, NONE,  op1(Si),    0 },
+
+/*18*/	{ "sbb",   TRUE,  BYTE,  op2(R, E),  0 },
+/*19*/	{ "sbb",   TRUE,  LONG,  op2(R, E),  0 },
+/*1a*/	{ "sbb",   TRUE,  BYTE,  op2(E, R),  0 },
+/*1b*/	{ "sbb",   TRUE,  LONG,  op2(E, R),  0 },
+/*1c*/	{ "sbb",   FALSE, BYTE,  op2(I, A),  0 },
+/*1d*/	{ "sbb",   FALSE, LONG,  op2(Is, A), 0 },
+/*1e*/	{ "push",  FALSE, NONE,  op1(Si),    0 },
+/*1f*/	{ "pop",   FALSE, NONE,  op1(Si),    0 },
+
+/*20*/	{ "and",   TRUE,  BYTE,  op2(R, E),  0 },
+/*21*/	{ "and",   TRUE,  LONG,  op2(R, E),  0 },
+/*22*/	{ "and",   TRUE,  BYTE,  op2(E, R),  0 },
+/*23*/	{ "and",   TRUE,  LONG,  op2(E, R),  0 },
+/*24*/	{ "and",   FALSE, BYTE,  op2(I, A),  0 },
+/*25*/	{ "and",   FALSE, LONG,  op2(I, A),  0 },
+/*26*/	{ "",      FALSE, NONE,  0,	     0 },
+/*27*/	{ "daa",   FALSE, NONE,  0,	     0 },
+
+/*28*/	{ "sub",   TRUE,  BYTE,  op2(R, E),  0 },
+/*29*/	{ "sub",   TRUE,  LONG,  op2(R, E),  0 },
+/*2a*/	{ "sub",   TRUE,  BYTE,  op2(E, R),  0 },
+/*2b*/	{ "sub",   TRUE,  LONG,  op2(E, R),  0 },
+/*2c*/	{ "sub",   FALSE, BYTE,  op2(I, A),  0 },
+/*2d*/	{ "sub",   FALSE, LONG,  op2(Is, A), 0 },
+/*2e*/	{ "",      FALSE, NONE,  0,	     0 },
+/*2f*/	{ "das",   FALSE, NONE,  0,	     0 },
+
+/*30*/	{ "xor",   TRUE,  BYTE,  op2(R, E),  0 },
+/*31*/	{ "xor",   TRUE,  LONG,  op2(R, E),  0 },
+/*32*/	{ "xor",   TRUE,  BYTE,  op2(E, R),  0 },
+/*33*/	{ "xor",   TRUE,  LONG,  op2(E, R),  0 },
+/*34*/	{ "xor",   FALSE, BYTE,  op2(I, A),  0 },
+/*35*/	{ "xor",   FALSE, LONG,  op2(I, A),  0 },
+/*36*/	{ "",      FALSE, NONE,  0,	     0 },
+/*37*/	{ "aaa",   FALSE, NONE,  0,	     0 },
+
+/*38*/	{ "cmp",   TRUE,  BYTE,  op2(R, E),  0 },
+/*39*/	{ "cmp",   TRUE,  LONG,  op2(R, E),  0 },
+/*3a*/	{ "cmp",   TRUE,  BYTE,  op2(E, R),  0 },
+/*3b*/	{ "cmp",   TRUE,  LONG,  op2(E, R),  0 },
+/*3c*/	{ "cmp",   FALSE, BYTE,  op2(I, A),  0 },
+/*3d*/	{ "cmp",   FALSE, LONG,  op2(Is, A), 0 },
+/*3e*/	{ "",      FALSE, NONE,  0,	     0 },
+/*3f*/	{ "aas",   FALSE, NONE,  0,	     0 },
+
+/*40*/	{ "rex",   FALSE, NONE,  0,          0 },
+/*41*/	{ "rex.b", FALSE, NONE,  0,          0 },
+/*42*/	{ "rex.x", FALSE, NONE,  0,          0 },
+/*43*/	{ "rex.xb", FALSE, NONE, 0,          0 },
+/*44*/	{ "rex.r", FALSE, NONE,  0,          0 },
+/*45*/	{ "rex.rb", FALSE, NONE, 0,          0 },
+/*46*/	{ "rex.rx", FALSE, NONE, 0,          0 },
+/*47*/	{ "rex.rxb", FALSE, NONE, 0,         0 },
+
+/*48*/	{ "rex.w", FALSE, NONE,  0,          0 },
+/*49*/	{ "rex.wb", FALSE, NONE, 0,          0 },
+/*4a*/	{ "rex.wx", FALSE, NONE, 0,          0 },
+/*4b*/	{ "rex.wxb", FALSE, NONE, 0,         0 },
+/*4c*/	{ "rex.wr", FALSE, NONE, 0,          0 },
+/*4d*/	{ "rex.wrb", FALSE, NONE, 0,         0 },
+/*4e*/	{ "rex.wrx", FALSE, NONE, 0,         0 },
+/*4f*/	{ "rex.wrxb", FALSE, NONE, 0,        0 },
+
+/*50*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*51*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*52*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*53*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*54*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*55*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*56*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+/*57*/	{ "push",  FALSE, LONG,  op1(Ri),    0 },
+
+/*58*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*59*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5a*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5b*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5c*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5d*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5e*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+/*5f*/	{ "pop",   FALSE, LONG,  op1(Ri),    0 },
+
+/*60*/	{ "pusha", FALSE, LONG,  0,	     0 },
+/*61*/	{ "popa",  FALSE, LONG,  0,	     0 },
+/*62*/  { "bound", TRUE,  LONG,  op2(E, R),  0 },
+/*63*/	{ "movslq",  TRUE,  NONE,  op2(EL,R), 0 },
+
+/*64*/	{ "",      FALSE, NONE,  0,	     0 },
+/*65*/	{ "",      FALSE, NONE,  0,	     0 },
+/*66*/	{ "",      FALSE, NONE,  0,	     0 },
+/*67*/	{ "",      FALSE, NONE,  0,	     0 },
+
+/*68*/	{ "push",  FALSE, LONG,  op1(I),     0 },
+/*69*/  { "imul",  TRUE,  LONG,  op3(I,E,R), 0 },
+/*6a*/	{ "push",  FALSE, LONG,  op1(Ibs),   0 },
+/*6b*/  { "imul",  TRUE,  LONG,  op3(Ibs,E,R),0 },
+/*6c*/	{ "ins",   FALSE, BYTE,  op2(DX, DI), 0 },
+/*6d*/	{ "ins",   FALSE, LONG,  op2(DX, DI), 0 },
+/*6e*/	{ "outs",  FALSE, BYTE,  op2(SI, DX), 0 },
+/*6f*/	{ "outs",  FALSE, LONG,  op2(SI, DX), 0 },
+
+/*70*/	{ "jo",    FALSE, NONE,  op1(Db),     0 },
+/*71*/	{ "jno",   FALSE, NONE,  op1(Db),     0 },
+/*72*/	{ "jb",    FALSE, NONE,  op1(Db),     0 },
+/*73*/	{ "jnb",   FALSE, NONE,  op1(Db),     0 },
+/*74*/	{ "jz",    FALSE, NONE,  op1(Db),     0 },
+/*75*/	{ "jnz",   FALSE, NONE,  op1(Db),     0 },
+/*76*/	{ "jbe",   FALSE, NONE,  op1(Db),     0 },
+/*77*/	{ "jnbe",  FALSE, NONE,  op1(Db),     0 },
+
+/*78*/	{ "js",    FALSE, NONE,  op1(Db),     0 },
+/*79*/	{ "jns",   FALSE, NONE,  op1(Db),     0 },
+/*7a*/	{ "jp",    FALSE, NONE,  op1(Db),     0 },
+/*7b*/	{ "jnp",   FALSE, NONE,  op1(Db),     0 },
+/*7c*/	{ "jl",    FALSE, NONE,  op1(Db),     0 },
+/*7d*/	{ "jnl",   FALSE, NONE,  op1(Db),     0 },
+/*7e*/	{ "jle",   FALSE, NONE,  op1(Db),     0 },
+/*7f*/	{ "jnle",  FALSE, NONE,  op1(Db),     0 },
+
+/*80*/  { "",	   TRUE,  BYTE,  op2(I, E),   db_Grp1 },
+/*81*/  { "",	   TRUE,  LONG,  op2(I, E),   db_Grp1 },
+/*82*/  { "",	   TRUE,  BYTE,  op2(I, E),   db_Grp1 },
+/*83*/  { "",	   TRUE,  LONG,  op2(Ibs,E),  db_Grp1 },
+/*84*/	{ "test",  TRUE,  BYTE,  op2(R, E),   0 },
+/*85*/	{ "test",  TRUE,  LONG,  op2(R, E),   0 },
+/*86*/	{ "xchg",  TRUE,  BYTE,  op2(R, E),   0 },
+/*87*/	{ "xchg",  TRUE,  LONG,  op2(R, E),   0 },
+
+/*88*/	{ "mov",   TRUE,  BYTE,  op2(R, E),   0 },
+/*89*/	{ "mov",   TRUE,  LONG,  op2(R, E),   0 },
+/*8a*/	{ "mov",   TRUE,  BYTE,  op2(E, R),   0 },
+/*8b*/	{ "mov",   TRUE,  LONG,  op2(E, R),   0 },
+/*8c*/  { "mov",   TRUE,  NONE,  op2(S, Ew),  0 },
+/*8d*/	{ "lea",   TRUE,  LONG,  op2(E, R),   0 },
+/*8e*/	{ "mov",   TRUE,  NONE,  op2(Ew, S),  0 },
+/*8f*/	{ "pop",   TRUE,  LONG,  op1(E),      0 },
+
+/*90*/	{ "nop",   FALSE, NONE,  0,	      0 },
+/*91*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*92*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*93*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*94*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*95*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*96*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+/*97*/	{ "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
+
+/*98*/	{ "cwde",  FALSE, SDEP,  0,	      "cbw" },
+/*99*/	{ "cdq",   FALSE, SDEP,  0,	      "cwd" },
+/*9a*/	{ "lcall", FALSE, NONE,  op1(OS),     0 },
+/*9b*/	{ "wait",  FALSE, NONE,  0,	      0 },
+/*9c*/	{ "pushf", FALSE, LONG,  0,	      0 },
+/*9d*/	{ "popf",  FALSE, LONG,  0,	      0 },
+/*9e*/	{ "sahf",  FALSE, NONE,  0,	      0 },
+/*9f*/	{ "lahf",  FALSE, NONE,  0,	      0 },
+
+/*a0*/	{ "mov",   FALSE, BYTE,  op2(O, A),   0 },
+/*a1*/	{ "mov",   FALSE, LONG,  op2(O, A),   0 },
+/*a2*/	{ "mov",   FALSE, BYTE,  op2(A, O),   0 },
+/*a3*/	{ "mov",   FALSE, LONG,  op2(A, O),   0 },
+/*a4*/	{ "movs",  FALSE, BYTE,  op2(SI,DI),  0 },
+/*a5*/	{ "movs",  FALSE, LONG,  op2(SI,DI),  0 },
+/*a6*/	{ "cmps",  FALSE, BYTE,  op2(SI,DI),  0 },
+/*a7*/	{ "cmps",  FALSE, LONG,  op2(SI,DI),  0 },
+
+/*a8*/	{ "test",  FALSE, BYTE,  op2(I, A),   0 },
+/*a9*/	{ "test",  FALSE, LONG,  op2(I, A),   0 },
+/*aa*/	{ "stos",  FALSE, BYTE,  op1(DI),     0 },
+/*ab*/	{ "stos",  FALSE, LONG,  op1(DI),     0 },
+/*ac*/	{ "lods",  FALSE, BYTE,  op1(SI),     0 },
+/*ad*/	{ "lods",  FALSE, LONG,  op1(SI),     0 },
+/*ae*/	{ "scas",  FALSE, BYTE,  op1(SI),     0 },
+/*af*/	{ "scas",  FALSE, LONG,  op1(SI),     0 },
+
+/*b0*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b1*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b2*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b3*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b4*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b5*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b6*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+/*b7*/	{ "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
+
+/*b8*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*b9*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*ba*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*bb*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*bc*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*bd*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*be*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+/*bf*/	{ "mov",   FALSE, LONG,  op2(Ilq, Ri),  0 },
+
+/*c0*/	{ "",	   TRUE,  BYTE,  op2(Ib, E),  db_Grp2 },
+/*c1*/	{ "",	   TRUE,  LONG,  op2(Ib, E),  db_Grp2 },
+/*c2*/	{ "ret",   FALSE, NONE,  op1(Iw),     0 },
+/*c3*/	{ "ret",   FALSE, NONE,  0,	      0 },
+/*c4*/	{ "les",   TRUE,  LONG,  op2(E, R),   0 },
+/*c5*/	{ "lds",   TRUE,  LONG,  op2(E, R),   0 },
+/*c6*/	{ "mov",   TRUE,  BYTE,  op2(I, E),   0 },
+/*c7*/	{ "mov",   TRUE,  LONG,  op2(I, E),   0 },
+
+/*c8*/	{ "enter", FALSE, NONE,  op2(Iw, Ib), 0 },
+/*c9*/	{ "leave", FALSE, NONE,  0,           0 },
+/*ca*/	{ "lret",  FALSE, NONE,  op1(Iw),     0 },
+/*cb*/	{ "lret",  FALSE, NONE,  0,	      0 },
+/*cc*/	{ "int",   FALSE, NONE,  op1(o3),     0 },
+/*cd*/	{ "int",   FALSE, NONE,  op1(Ib),     0 },
+/*ce*/	{ "into",  FALSE, NONE,  0,	      0 },
+/*cf*/	{ "iret",  FALSE, NONE,  0,	      0 },
+
+/*d0*/	{ "",	   TRUE,  BYTE,  op2(o1, E),  db_Grp2 },
+/*d1*/	{ "",	   TRUE,  LONG,  op2(o1, E),  db_Grp2 },
+/*d2*/	{ "",	   TRUE,  BYTE,  op2(CL, E),  db_Grp2 },
+/*d3*/	{ "",	   TRUE,  LONG,  op2(CL, E),  db_Grp2 },
+/*d4*/	{ "aam",   FALSE, NONE,  op1(Iba),    0 },
+/*d5*/	{ "aad",   FALSE, NONE,  op1(Iba),    0 },
+/*d6*/	{ ".byte\t0xd6", FALSE, NONE, 0,      0 },
+/*d7*/	{ "xlat",  FALSE, BYTE,  op1(BX),     0 },
+
+/*d8*/  { "",      TRUE,  NONE,  0,	      db_Esc8 },
+/*d9*/  { "",      TRUE,  NONE,  0,	      db_Esc9 },
+/*da*/  { "",      TRUE,  NONE,  0,	      db_Esca },
+/*db*/  { "",      TRUE,  NONE,  0,	      db_Escb },
+/*dc*/  { "",      TRUE,  NONE,  0,	      db_Escc },
+/*dd*/  { "",      TRUE,  NONE,  0,	      db_Escd },
+/*de*/  { "",      TRUE,  NONE,  0,	      db_Esce },
+/*df*/  { "",      TRUE,  NONE,  0,	      db_Escf },
+
+/*e0*/	{ "loopne",FALSE, NONE,  op1(Db),     0 },
+/*e1*/	{ "loope", FALSE, NONE,  op1(Db),     0 },
+/*e2*/	{ "loop",  FALSE, NONE,  op1(Db),     0 },
+/*e3*/	{ "jrcxz", FALSE, ADEP,  op1(Db),     "jecxz" },
+/*e4*/	{ "in",    FALSE, BYTE,  op2(Ib, A),  0 },
+/*e5*/	{ "in",    FALSE, LONG,  op2(Ib, A) , 0 },
+/*e6*/	{ "out",   FALSE, BYTE,  op2(A, Ib),  0 },
+/*e7*/	{ "out",   FALSE, LONG,  op2(A, Ib) , 0 },
+
+/*e8*/	{ "call",  FALSE, NONE,  op1(Dl),     0 },
+/*e9*/	{ "jmp",   FALSE, NONE,  op1(Dl),     0 },
+/*ea*/	{ "ljmp",  FALSE, NONE,  op1(OS),     0 },
+/*eb*/	{ "jmp",   FALSE, NONE,  op1(Db),     0 },
+/*ec*/	{ "in",    FALSE, BYTE,  op2(DX, A),  0 },
+/*ed*/	{ "in",    FALSE, LONG,  op2(DX, A) , 0 },
+/*ee*/	{ "out",   FALSE, BYTE,  op2(A, DX),  0 },
+/*ef*/	{ "out",   FALSE, LONG,  op2(A, DX) , 0 },
+
+/*f0*/	{ "",      FALSE, NONE,  0,	     0 },
+/*f1*/	{ ".byte\t0xf1", FALSE, NONE, 0,     0 },
+/*f2*/	{ "",      FALSE, NONE,  0,	     0 },
+/*f3*/	{ "",      FALSE, NONE,  0,	     0 },
+/*f4*/	{ "hlt",   FALSE, NONE,  0,	     0 },
+/*f5*/	{ "cmc",   FALSE, NONE,  0,	     0 },
+/*f6*/	{ "",      TRUE,  BYTE,  0,	     db_Grp3 },
+/*f7*/	{ "",	   TRUE,  LONG,  0,	     db_Grp3 },
+
+/*f8*/	{ "clc",   FALSE, NONE,  0,	     0 },
+/*f9*/	{ "stc",   FALSE, NONE,  0,	     0 },
+/*fa*/	{ "cli",   FALSE, NONE,  0,	     0 },
+/*fb*/	{ "sti",   FALSE, NONE,  0,	     0 },
+/*fc*/	{ "cld",   FALSE, NONE,  0,	     0 },
+/*fd*/	{ "std",   FALSE, NONE,  0,	     0 },
+/*fe*/	{ "",	   TRUE,  NONE,  0,	     db_Grp4 },
+/*ff*/	{ "",	   TRUE,  NONE,  0,	     db_Grp5 },
+};
+
+static const struct inst db_bad_inst =
+	{ "???",   FALSE, NONE,  0,	      0 }
+;
+
+#define	f_mod(rex, byte)	((byte)>>6)
+#define	f_reg(rex, byte)	((((byte)>>3)&0x7) | (rex & REX_R ? 0x8 : 0x0))
+#define	f_rm(rex, byte)		(((byte)&0x7) | (rex & REX_B ? 0x8 : 0x0))
+
+#define	sib_ss(rex, byte)	((byte)>>6)
+#define	sib_index(rex, byte)	((((byte)>>3)&0x7) | (rex & REX_X ? 0x8 : 0x0))
+#define	sib_base(rex, byte)	(((byte)&0x7) | (rex & REX_B ? 0x8 : 0x0))
+
+struct i_addr {
+	int		is_reg;	/* if reg, reg number is in 'disp' */
+	int		disp;
+	const char *	base;
+	const char *	index;
+	int		ss;
+};
+
+static const char * const db_reg[2][4][16] = {
+
+	{{"%al",  "%cl",  "%dl",  "%bl",  "%ah",  "%ch",  "%dh",  "%bh",
+	  "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" },
+	{ "%ax",  "%cx",  "%dx",  "%bx",  "%sp",  "%bp",  "%si",  "%di",
+	  "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" },
+	{ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+	  "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" },
+	{ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+	  "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }},
+
+	{{"%al",  "%cl",  "%dl",  "%bl",  "%spl",  "%bpl",  "%sil",  "%dil",
+	  "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" },
+	{ "%ax",  "%cx",  "%dx",  "%bx",  "%sp",  "%bp",  "%si",  "%di",
+	  "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" },
+	{ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+	  "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" },
+	{ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+	  "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }}
+};
+
+static const char * const db_seg_reg[8] = {
+	"%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", ""
+};
+
+/*
+ * lengths for size attributes
+ */
+static const int db_lengths[] = {
+	1,	/* BYTE */
+	2,	/* WORD */
+	4,	/* LONG */
+	8,	/* QUAD */
+	4,	/* SNGL */
+	8,	/* DBLR */
+	10,	/* EXTR */
+};
+
+#define	get_value_inc(result, loc, size, is_signed) \
+	result = db_get_value((loc), (size), (is_signed)); \
+	(loc) += (size);
+
+static db_addr_t
+		db_disasm_esc(db_addr_t loc, int inst, int rex, int short_addr,
+		    int size, const char *seg);
+static void	db_print_address(const char *seg, int size, int rex,
+		    struct i_addr *addrp);
+static db_addr_t
+		db_read_address(db_addr_t loc, int short_addr, int rex, int regmodrm,
+		    struct i_addr *addrp);
+
+/*
+ * Read address at location and return updated location.
+ */
+static db_addr_t
+db_read_address(loc, short_addr, rex, regmodrm, addrp)
+	db_addr_t	loc;
+	int		short_addr;
+	int		rex;
+	int		regmodrm;
+	struct i_addr *	addrp;		/* out */
+{
+	int		mod, rm, sib, index, disp, size, have_sib;
+
+	mod = f_mod(rex, regmodrm);
+	rm  = f_rm(rex, regmodrm);
+
+	if (mod == 3) {
+	    addrp->is_reg = TRUE;
+	    addrp->disp = rm;
+	    return (loc);
+	}
+	addrp->is_reg = FALSE;
+	addrp->index = 0;
+
+	if (short_addr)
+	    size = LONG;
+	else
+	    size = QUAD;
+
+	if ((rm & 0x7) == 4) {
+	    get_value_inc(sib, loc, 1, FALSE);
+	    rm = sib_base(rex, sib);
+	    index = sib_index(rex, sib);
+	    if (index != 4)
+		addrp->index = db_reg[1][size][index];
+	    addrp->ss = sib_ss(rex, sib);
+	    have_sib = 1;
+	} else
+	    have_sib = 0;
+
+	switch (mod) {
+	    case 0:
+		if (rm == 5) {
+		    get_value_inc(addrp->disp, loc, 4, FALSE);
+		    if (have_sib)
+			addrp->base = 0;
+		    else if (short_addr)
+			addrp->base = "%eip";
+		    else
+			addrp->base = "%rip";
+		} else {
+		    addrp->disp = 0;
+		    addrp->base = db_reg[1][size][rm];
+		}
+		break;
+
+	    case 1:
+		get_value_inc(disp, loc, 1, TRUE);
+		addrp->disp = disp;
+		addrp->base = db_reg[1][size][rm];
+		break;
+
+	    case 2:
+		get_value_inc(disp, loc, 4, FALSE);
+		addrp->disp = disp;
+		addrp->base = db_reg[1][size][rm];
+		break;
+	}
+	return (loc);
+}
+
+static void
+db_print_address(seg, size, rex, addrp)
+	const char *	seg;
+	int		size;
+	int		rex;
+	struct i_addr *	addrp;
+{
+	if (addrp->is_reg) {
+	    db_printf("%s", db_reg[rex != 0 ? 1 : 0][(size == LONG && (rex & REX_W)) ? QUAD : size][addrp->disp]);
+	    return;
+	}
+
+	if (seg) {
+	    db_printf("%s:", seg);
+	}
+
+	if (addrp->disp != 0 || (addrp->base == 0 && addrp->index == 0))
+		db_printsym((db_addr_t)addrp->disp, DB_STGY_ANY);
+	if (addrp->base != 0 || addrp->index != 0) {
+	    db_printf("(");
+	    if (addrp->base)
+		db_printf("%s", addrp->base);
+	    if (addrp->index)
+		db_printf(",%s,%d", addrp->index, 1<<addrp->ss);
+	    db_printf(")");
+	}
+}
+
+/*
+ * Disassemble floating-point ("escape") instruction
+ * and return updated location.
+ */
+static db_addr_t
+db_disasm_esc(loc, inst, rex, short_addr, size, seg)
+	db_addr_t	loc;
+	int		inst;
+	int		rex;
+	int		short_addr;
+	int		size;
+	const char *	seg;
+{
+	int		regmodrm;
+	const struct finst *	fp;
+	int		mod;
+	struct i_addr	address;
+	const char *	name;
+
+	get_value_inc(regmodrm, loc, 1, FALSE);
+	fp = &db_Esc_inst[inst - 0xd8][f_reg(rex, regmodrm)];
+	mod = f_mod(rex, regmodrm);
+	if (mod != 3) {
+	    if (*fp->f_name == '\0') {
+		db_printf("<bad instruction>");
+		return (loc);
+	    }
+	    /*
+	     * Normal address modes.
+	     */
+	    loc = db_read_address(loc, short_addr, rex, regmodrm, &address);
+	    db_printf("%s", fp->f_name);
+	    switch(fp->f_size) {
+		case SNGL:
+		    db_printf("s");
+		    break;
+		case DBLR:
+		    db_printf("l");
+		    break;
+		case EXTR:
+		    db_printf("t");
+		    break;
+		case WORD:
+		    db_printf("s");
+		    break;
+		case LONG:
+		    db_printf("l");
+		    break;
+		case QUAD:
+		    db_printf("q");
+		    break;
+		default:
+		    break;
+	    }
+	    db_printf("\t");
+	    db_print_address(seg, BYTE, rex, &address);
+	}
+	else {
+	    /*
+	     * 'reg-reg' - special formats
+	     */
+	    switch (fp->f_rrmode) {
+		case op2(ST,STI):
+		    name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
+		    db_printf("%s\t%%st,%%st(%d)",name,f_rm(rex, regmodrm));
+		    break;
+		case op2(STI,ST):
+		    name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
+		    db_printf("%s\t%%st(%d),%%st",name, f_rm(rex, regmodrm));
+		    break;
+		case op1(STI):
+		    name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
+		    db_printf("%s\t%%st(%d)",name, f_rm(rex, regmodrm));
+		    break;
+		case op1(X):
+		    name = ((const char * const *)fp->f_rrname)[f_rm(rex, regmodrm)];
+		    if (*name == '\0')
+			goto bad;
+		    db_printf("%s", name);
+		    break;
+		case op1(XA):
+		    name = ((const char * const *)fp->f_rrname)[f_rm(rex, regmodrm)];
+		    if (*name == '\0')
+			goto bad;
+		    db_printf("%s\t%%ax", name);
+		    break;
+		default:
+		bad:
+		    db_printf("<bad instruction>");
+		    break;
+	    }
+	}
+
+	return (loc);
+}
+
+/*
+ * Disassemble instruction at 'loc'.  'altfmt' specifies an
+ * (optional) alternate format.  Return address of start of
+ * next instruction.
+ */
+db_addr_t
+db_disasm(loc, altfmt)
+	db_addr_t	loc;
+	boolean_t	altfmt;
+{
+	int	inst;
+	int	size;
+	int	short_addr;
+	const char *	seg;
+	const struct inst *	ip;
+	const char *	i_name;
+	int	i_size;
+	int	i_mode;
+	int	rex = 0;
+	int	regmodrm = 0;
+	boolean_t	first;
+	int	displ;
+	int	prefix;
+	int	rep;
+	int	imm;
+	int	imm2;
+	long	imm64;
+	int	len;
+	struct i_addr	address;
+
+	get_value_inc(inst, loc, 1, FALSE);
+	short_addr = FALSE;
+	size = LONG;
+	seg = 0;
+
+	/*
+	 * Get prefixes
+	 */
+	rep = FALSE;
+	prefix = TRUE;
+	do {
+	    switch (inst) {
+		case 0x66:		/* data16 */
+		    size = WORD;
+		    break;
+		case 0x67:
+		    short_addr = TRUE;
+		    break;
+		case 0x26:
+		    seg = "%es";
+		    break;
+		case 0x36:
+		    seg = "%ss";
+		    break;
+		case 0x2e:
+		    seg = "%cs";
+		    break;
+		case 0x3e:
+		    seg = "%ds";
+		    break;
+		case 0x64:
+		    seg = "%fs";
+		    break;
+		case 0x65:
+		    seg = "%gs";
+		    break;
+		case 0xf0:
+		    db_printf("lock ");
+		    break;
+		case 0xf2:
+		    db_printf("repne ");
+		    break;
+		case 0xf3:
+		    rep = TRUE;
+		    break;
+		default:
+		    prefix = FALSE;
+		    break;
+	    }
+	    if (inst >= 0x40 && inst < 0x50) {
+		rex = inst;
+		prefix = TRUE;
+	    }
+	    if (prefix) {
+		get_value_inc(inst, loc, 1, FALSE);
+	    }
+	} while (prefix);
+
+	if (inst >= 0xd8 && inst <= 0xdf) {
+	    loc = db_disasm_esc(loc, inst, rex, short_addr, size, seg);
+	    db_printf("\n");
+	    return (loc);
+	}
+
+	ip = &db_inst_table[inst];
+	while (ip->i_size == ESC) {
+	    get_value_inc(inst, loc, 1, FALSE);
+	    ip = ((const struct inst * const *)ip->i_extra)[inst>>4];
+	    if (ip == 0) {
+		ip = &db_bad_inst;
+	    }
+	    else {
+		ip = &ip[inst&0xf];
+	    }
+	}
+
+	if (ip->i_has_modrm) {
+	    get_value_inc(regmodrm, loc, 1, FALSE);
+	    loc = db_read_address(loc, short_addr, rex, regmodrm, &address);
+	}
+
+	i_name = ip->i_name;
+	i_size = ip->i_size;
+	i_mode = ip->i_mode;
+
+	if (ip->i_extra == db_Grp1 || ip->i_extra == db_Grp2 ||
+	    ip->i_extra == db_Grp6 || ip->i_extra == db_Grp7 ||
+	    ip->i_extra == db_Grp8 || ip->i_extra == db_Grp9 ||
+	    ip->i_extra == db_Grp15) {
+	    i_name = ((const char * const *)ip->i_extra)[f_reg(rex, regmodrm)];
+	}
+	else if (ip->i_extra == db_Grp3) {
+	    ip = ip->i_extra;
+	    ip = &ip[f_reg(rex, regmodrm)];
+	    i_name = ip->i_name;
+	    i_mode = ip->i_mode;
+	}
+	else if (ip->i_extra == db_Grp4 || ip->i_extra == db_Grp5) {
+	    ip = ip->i_extra;
+	    ip = &ip[f_reg(rex, regmodrm)];
+	    i_name = ip->i_name;
+	    i_mode = ip->i_mode;
+	    i_size = ip->i_size;
+	}
+
+	/* Special cases that don't fit well in the tables. */
+	if (ip->i_extra == db_Grp7 && f_mod(rex, regmodrm) == 3) {
+		switch (regmodrm) {
+		case 0xc1:
+			i_name = "vmcall";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xc2:
+			i_name = "vmlaunch";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xc3:
+			i_name = "vmresume";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xc4:
+			i_name = "vmxoff";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xc8:
+			i_name = "monitor";
+			i_size = NONE;
+			i_mode = 0;			
+			break;
+		case 0xc9:
+			i_name = "mwait";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xd0:
+			i_name = "xgetbv";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xd1:
+			i_name = "xsetbv";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xf8:
+			i_name = "swapgs";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		case 0xf9:
+			i_name = "rdtscp";
+			i_size = NONE;
+			i_mode = 0;
+			break;
+		}
+	}
+	if (ip->i_extra == db_Grp15 && f_mod(rex, regmodrm) == 3) {
+		i_name = db_Grp15b[f_reg(rex, regmodrm)];
+		i_size = NONE;
+		i_mode = 0;
+	}
+
+	/* Handle instructions identified by mandatory prefixes. */
+	if (rep == TRUE) {
+	    if (inst == 0x90) {
+		i_name = "pause";
+		i_size = NONE;
+		i_mode = 0;
+		rep = FALSE;
+	    } else if (ip->i_extra == db_Grp9 && f_mod(rex, regmodrm) != 3 &&
+		f_reg(rex, regmodrm) == 0x6) {
+		i_name = "vmxon";
+		rep = FALSE;
+	    }
+	}
+	if (size == WORD) {
+	    if (ip->i_extra == db_Grp9 && f_mod(rex, regmodrm) != 3 &&
+		f_reg(rex, regmodrm) == 0x6) {
+		i_name = "vmclear";
+	    }
+	}
+	if (rex & REX_W) {
+	    if (strcmp(i_name, "cwde") == 0)
+		i_name = "cdqe";
+	    else if (strcmp(i_name, "cmpxchg8b") == 0)
+		i_name = "cmpxchg16b";
+	}
+
+	if (rep == TRUE)
+	    db_printf("repe ");	/* XXX repe VS rep */
+
+	if (i_size == SDEP) {
+	    if (size == LONG)
+		db_printf("%s", i_name);
+	    else
+		db_printf("%s", (const char *)ip->i_extra);
+	} else if (i_size == ADEP) {
+	    if (short_addr == FALSE)
+		db_printf("%s", i_name);
+	    else
+		db_printf("%s", (const char *)ip->i_extra);
+	}
+	else {
+	    db_printf("%s", i_name);
+	    if ((inst >= 0x50 && inst <= 0x5f) || inst == 0x68 || inst == 0x6a) {
+		i_size = NONE;
+		db_printf("q");
+	    }
+	    if (i_size != NONE) {
+		if (i_size == BYTE) {
+		    db_printf("b");
+		    size = BYTE;
+		}
+		else if (i_size == WORD) {
+		    db_printf("w");
+		    size = WORD;
+		}
+		else if (size == WORD)
+		    db_printf("w");
+		else {
+		    if (rex & REX_W)
+			db_printf("q");
+		    else
+			db_printf("l");
+		}
+	    }
+	}
+	db_printf("\t");
+	for (first = TRUE;
+	     i_mode != 0;
+	     i_mode >>= 8, first = FALSE)
+	{
+	    if (!first)
+		db_printf(",");
+
+	    switch (i_mode & 0xFF) {
+
+		case E:
+		    db_print_address(seg, size, rex, &address);
+		    break;
+
+		case Eind:
+		    db_printf("*");
+		    db_print_address(seg, size, rex, &address);
+		    break;
+
+		case El:
+		    db_print_address(seg, (rex & REX_W) ? QUAD : LONG, rex, &address);
+		    break;
+
+		case EL:
+		    db_print_address(seg, LONG, 0, &address);
+		    break;
+
+		case Ew:
+		    db_print_address(seg, WORD, rex, &address);
+		    break;
+
+		case Eb:
+		    db_print_address(seg, BYTE, rex, &address);
+		    break;
+
+		case R:
+		    db_printf("%s", db_reg[rex != 0 ? 1 : 0][(size == LONG && (rex & REX_W)) ? QUAD : size][f_reg(rex, regmodrm)]);
+		    break;
+
+		case Rw:
+		    db_printf("%s", db_reg[rex != 0 ? 1 : 0][WORD][f_reg(rex, regmodrm)]);
+		    break;
+
+		case Rq:
+		    db_printf("%s", db_reg[rex != 0 ? 1 : 0][QUAD][f_reg(rex, regmodrm)]);
+		    break;
+
+		case Ri:
+		    db_printf("%s", db_reg[0][QUAD][f_rm(rex, inst)]);
+		    break;
+
+		case Ril:
+		    db_printf("%s", db_reg[rex != 0 ? 1 : 0][(rex & REX_R) ? QUAD : LONG][f_rm(rex, inst)]);
+		    break;
+
+		case S:
+		    db_printf("%s", db_seg_reg[f_reg(rex, regmodrm)]);
+		    break;
+
+		case Si:
+		    db_printf("%s", db_seg_reg[f_reg(rex, inst)]);
+		    break;
+
+		case A:
+		    db_printf("%s", db_reg[rex != 0 ? 1 : 0][size][0]);	/* acc */
+		    break;
+
+		case BX:
+		    if (seg)
+			db_printf("%s:", seg);
+		    db_printf("(%s)", short_addr ? "%bx" : "%ebx");
+		    break;
+
+		case CL:
+		    db_printf("%%cl");
+		    break;
+
+		case DX:
+		    db_printf("%%dx");
+		    break;
+
+		case SI:
+		    if (seg)
+			db_printf("%s:", seg);
+		    db_printf("(%s)", short_addr ? "%si" : "%rsi");
+		    break;
+
+		case DI:
+		    db_printf("%%es:(%s)", short_addr ? "%di" : "%rdi");
+		    break;
+
+		case CR:
+		    db_printf("%%cr%d", f_reg(rex, regmodrm));
+		    break;
+
+		case DR:
+		    db_printf("%%dr%d", f_reg(rex, regmodrm));
+		    break;
+
+		case TR:
+		    db_printf("%%tr%d", f_reg(rex, regmodrm));
+		    break;
+
+		case I:
+		    len = db_lengths[size];
+		    get_value_inc(imm, loc, len, FALSE);
+		    db_printf("$%#r", imm);
+		    break;
+
+		case Is:
+		    len = db_lengths[(size == LONG && (rex & REX_W)) ? QUAD : size];
+		    get_value_inc(imm, loc, len, FALSE);
+		    db_printf("$%+#r", imm);
+		    break;
+
+		case Ib:
+		    get_value_inc(imm, loc, 1, FALSE);
+		    db_printf("$%#r", imm);
+		    break;
+
+		case Iba:
+		    get_value_inc(imm, loc, 1, FALSE);
+		    if (imm != 0x0a)
+			db_printf("$%#r", imm);
+		    break;
+
+		case Ibs:
+		    get_value_inc(imm, loc, 1, TRUE);
+		    if (size == WORD)
+			imm &= 0xFFFF;
+		    db_printf("$%+#r", imm);
+		    break;
+
+		case Iw:
+		    get_value_inc(imm, loc, 2, FALSE);
+		    db_printf("$%#r", imm);
+		    break;
+
+		case Ilq:
+		    len = db_lengths[rex & REX_W ? QUAD : LONG];
+		    get_value_inc(imm64, loc, len, FALSE);
+		    db_printf("$%#lr", imm64);
+		    break;
+
+		case O:
+		    len = (short_addr ? 2 : 4);
+		    get_value_inc(displ, loc, len, FALSE);
+		    if (seg)
+			db_printf("%s:%+#r",seg, displ);
+		    else
+			db_printsym((db_addr_t)displ, DB_STGY_ANY);
+		    break;
+
+		case Db:
+		    get_value_inc(displ, loc, 1, TRUE);
+		    displ += loc;
+		    if (size == WORD)
+			displ &= 0xFFFF;
+		    db_printsym((db_addr_t)displ, DB_STGY_XTRN);
+		    break;
+
+		case Dl:
+		    len = db_lengths[(size == LONG && (rex & REX_W)) ? QUAD : size];
+		    get_value_inc(displ, loc, len, FALSE);
+		    displ += loc;
+		    if (size == WORD)
+			displ &= 0xFFFF;
+		    db_printsym((db_addr_t)displ, DB_STGY_XTRN);
+		    break;
+
+		case o1:
+		    db_printf("$1");
+		    break;
+
+		case o3:
+		    db_printf("$3");
+		    break;
+
+		case OS:
+		    len = db_lengths[size];
+		    get_value_inc(imm, loc, len, FALSE);	/* offset */
+		    get_value_inc(imm2, loc, 2, FALSE);	/* segment */
+		    db_printf("$%#r,%#r", imm2, imm);
+		    break;
+	    }
+	}
+	db_printf("\n");
+	return (loc);
+}
diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
new file mode 100644
index 0000000..f44cac4
--- /dev/null
+++ b/sys/amd64/amd64/db_interface.c
@@ -0,0 +1,149 @@
+/*-
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Interface to new debugger.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kdb.h>
+#include <sys/cons.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+int
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	jmp_buf jb;
+	void *prev_jb;
+	char *src;
+	int ret;
+
+	prev_jb = kdb_jmpbuf(jb);
+	ret = setjmp(jb);
+	if (ret == 0) {
+		src = (char *)addr;
+		while (size-- > 0)
+			*data++ = *src++;
+	}
+	(void)kdb_jmpbuf(prev_jb);
+	return (ret);
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+int
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	jmp_buf jb;
+	void *prev_jb;
+	char *dst;
+	pt_entry_t	*ptep0 = NULL;
+	pt_entry_t	oldmap0 = 0;
+	vm_offset_t	addr1;
+	pt_entry_t	*ptep1 = NULL;
+	pt_entry_t	oldmap1 = 0;
+	int ret;
+
+	prev_jb = kdb_jmpbuf(jb);
+	ret = setjmp(jb);
+	if (ret == 0) {
+		if (addr > trunc_page((vm_offset_t)btext) - size &&
+		    addr < round_page((vm_offset_t)etext)) {
+
+			ptep0 = vtopte(addr);
+			oldmap0 = *ptep0;
+			*ptep0 |= PG_RW;
+
+			/*
+			 * Map another page if the data crosses a page
+			 * boundary.
+			 */
+			if ((*ptep0 & PG_PS) == 0) {
+				addr1 = trunc_page(addr + size - 1);
+				if (trunc_page(addr) != addr1) {
+					ptep1 = vtopte(addr1);
+					oldmap1 = *ptep1;
+					*ptep1 |= PG_RW;
+				}
+			} else {
+				addr1 = trunc_2mpage(addr + size - 1);
+				if (trunc_2mpage(addr) != addr1) {
+					ptep1 = vtopte(addr1);
+					oldmap1 = *ptep1;
+					*ptep1 |= PG_RW;
+				}
+			}
+
+			invltlb();
+		}
+
+		dst = (char *)addr;
+
+		while (size-- > 0)
+			*dst++ = *data++;
+	}
+
+	(void)kdb_jmpbuf(prev_jb);
+
+	if (ptep0) {
+		*ptep0 = oldmap0;
+
+		if (ptep1)
+			*ptep1 = oldmap1;
+
+		invltlb();
+	}
+
+	return (ret);
+}
+
+void
+db_show_mdpcpu(struct pcpu *pc)
+{
+
+	db_printf("curpmap      = %p\n", pc->pc_curpmap);
+	db_printf("tssp         = %p\n", pc->pc_tssp);
+	db_printf("commontssp   = %p\n", pc->pc_commontssp);
+	db_printf("rsp0         = 0x%lx\n", pc->pc_rsp0);
+	db_printf("gs32p        = %p\n", pc->pc_gs32p);
+	db_printf("ldt          = %p\n", pc->pc_ldt);
+	db_printf("tss          = %p\n", pc->pc_tss);
+}
diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
new file mode 100644
index 0000000..2c81f87
--- /dev/null
+++ b/sys/amd64/amd64/db_trace.c
@@ -0,0 +1,701 @@
+/*-
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kdb.h>
+#include <sys/proc.h>
+#include <sys/stack.h>
+#include <sys/sysent.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/reg.h>
+#include <machine/stack.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_variables.h>
+
+static db_varfcn_t db_dr0;
+static db_varfcn_t db_dr1;
+static db_varfcn_t db_dr2;
+static db_varfcn_t db_dr3;
+static db_varfcn_t db_dr4;
+static db_varfcn_t db_dr5;
+static db_varfcn_t db_dr6;
+static db_varfcn_t db_dr7;
+static db_varfcn_t db_frame;
+static db_varfcn_t db_rsp;
+static db_varfcn_t db_ss;
+
+/*
+ * Machine register set.
+ */
+#define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
+struct db_variable db_regs[] = {
+	{ "cs",		DB_OFFSET(tf_cs),	db_frame },
+	{ "ds",		DB_OFFSET(tf_ds),	db_frame },
+	{ "es",		DB_OFFSET(tf_es),	db_frame },
+	{ "fs",		DB_OFFSET(tf_fs),	db_frame },
+	{ "gs",		DB_OFFSET(tf_gs),	db_frame },
+	{ "ss",		NULL,			db_ss },
+	{ "rax",	DB_OFFSET(tf_rax),	db_frame },
+	{ "rcx",        DB_OFFSET(tf_rcx),	db_frame },
+	{ "rdx",	DB_OFFSET(tf_rdx),	db_frame },
+	{ "rbx",	DB_OFFSET(tf_rbx),	db_frame },
+	{ "rsp",	NULL,			db_rsp },
+	{ "rbp",	DB_OFFSET(tf_rbp),	db_frame },
+	{ "rsi",	DB_OFFSET(tf_rsi),	db_frame },
+	{ "rdi",	DB_OFFSET(tf_rdi),	db_frame },
+	{ "r8",		DB_OFFSET(tf_r8),	db_frame },
+	{ "r9",		DB_OFFSET(tf_r9),	db_frame },
+	{ "r10",	DB_OFFSET(tf_r10),	db_frame },
+	{ "r11",	DB_OFFSET(tf_r11),	db_frame },
+	{ "r12",	DB_OFFSET(tf_r12),	db_frame },
+	{ "r13",	DB_OFFSET(tf_r13),	db_frame },
+	{ "r14",	DB_OFFSET(tf_r14),	db_frame },
+	{ "r15",	DB_OFFSET(tf_r15),	db_frame },
+	{ "rip",	DB_OFFSET(tf_rip),	db_frame },
+	{ "rflags",	DB_OFFSET(tf_rflags),	db_frame },
+#define	DB_N_SHOW_REGS	24	/* Don't show registers after here. */
+	{ "dr0",	NULL,			db_dr0 },
+	{ "dr1",	NULL,			db_dr1 },
+	{ "dr2",	NULL,			db_dr2 },
+	{ "dr3",	NULL,			db_dr3 },
+	{ "dr4",	NULL,			db_dr4 },
+	{ "dr5",	NULL,			db_dr5 },
+	{ "dr6",	NULL,			db_dr6 },
+	{ "dr7",	NULL,			db_dr7 },
+};
+struct db_variable *db_eregs = db_regs + DB_N_SHOW_REGS;
+
+#define DB_DRX_FUNC(reg)		\
+static int				\
+db_ ## reg (vp, valuep, op)		\
+	struct db_variable *vp;		\
+	db_expr_t * valuep;		\
+	int op;				\
+{					\
+	if (op == DB_VAR_GET)		\
+		*valuep = r ## reg ();	\
+	else				\
+		load_ ## reg (*valuep); \
+	return (1);			\
+}
+
+DB_DRX_FUNC(dr0)
+DB_DRX_FUNC(dr1)
+DB_DRX_FUNC(dr2)
+DB_DRX_FUNC(dr3)
+DB_DRX_FUNC(dr4)
+DB_DRX_FUNC(dr5)
+DB_DRX_FUNC(dr6)
+DB_DRX_FUNC(dr7)
+
+static __inline long
+get_rsp(struct trapframe *tf)
+{
+	return ((ISPL(tf->tf_cs)) ? tf->tf_rsp :
+	    (db_expr_t)tf + offsetof(struct trapframe, tf_rsp));
+}
+
+static int
+db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+	long *reg;
+
+	if (kdb_frame == NULL)
+		return (0);
+
+	reg = (long *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
+	if (op == DB_VAR_GET)
+		*valuep = *reg;
+	else
+		*reg = *valuep;
+	return (1);
+}
+
+static int
+db_rsp(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+
+	if (kdb_frame == NULL)
+		return (0);
+
+	if (op == DB_VAR_GET)
+		*valuep = get_rsp(kdb_frame);
+	else if (ISPL(kdb_frame->tf_cs))
+		kdb_frame->tf_rsp = *valuep;
+	return (1);
+}
+
+static int
+db_ss(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+
+	if (kdb_frame == NULL)
+		return (0);
+
+	if (op == DB_VAR_GET)
+		*valuep = (ISPL(kdb_frame->tf_cs)) ? kdb_frame->tf_ss : rss();
+	else if (ISPL(kdb_frame->tf_cs))
+		kdb_frame->tf_ss = *valuep;
+	return (1);
+}
+
+#define NORMAL		0
+#define	TRAP		1
+#define	INTERRUPT	2
+#define	SYSCALL		3
+#define	TRAP_INTERRUPT	5
+
+static void db_nextframe(struct amd64_frame **, db_addr_t *, struct thread *);
+static int db_numargs(struct amd64_frame *);
+static void db_print_stack_entry(const char *, int, char **, long *, db_addr_t,
+    void *);
+static void decode_syscall(int, struct thread *);
+
+static const char * watchtype_str(int type);
+int  amd64_set_watch(int watchnum, unsigned long watchaddr, int size,
+		    int access, struct dbreg *d);
+int  amd64_clr_watch(int watchnum, struct dbreg *d);
+
+/*
+ * Figure out how many arguments were passed into the frame at "fp".
+ */
+static int
+db_numargs(fp)
+	struct amd64_frame *fp;
+{
+#if 1
+	return (0);	/* regparm, needs dwarf2 info */
+#else
+	long	*argp;
+	int	inst;
+	int	args;
+
+	argp = (long *)db_get_value((long)&fp->f_retaddr, 8, FALSE);
+	/*
+	 * XXX etext is wrong for LKMs.  We should attempt to interpret
+	 * the instruction at the return address in all cases.  This
+	 * may require better fault handling.
+	 */
+	if (argp < (long *)btext || argp >= (long *)etext) {
+		args = 5;
+	} else {
+		inst = db_get_value((long)argp, 4, FALSE);
+		if ((inst & 0xff) == 0x59)	/* popl %ecx */
+			args = 1;
+		else if ((inst & 0xffff) == 0xc483)	/* addl $Ibs, %esp */
+			args = ((inst >> 16) & 0xff) / 4;
+		else
+			args = 5;
+	}
+	return (args);
+#endif
+}
+
+static void
+db_print_stack_entry(name, narg, argnp, argp, callpc, frame)
+	const char *name;
+	int narg;
+	char **argnp;
+	long *argp;
+	db_addr_t callpc;
+	void *frame;
+{
+	db_printf("%s(", name);
+#if 0
+	while (narg) {
+		if (argnp)
+			db_printf("%s=", *argnp++);
+		db_printf("%lr", (long)db_get_value((long)argp, 8, FALSE));
+		argp++;
+		if (--narg != 0)
+			db_printf(",");
+	}
+#endif
+	db_printf(") at ");
+	db_printsym(callpc, DB_STGY_PROC);
+	if (frame != NULL)
+		db_printf("/frame 0x%lx", (register_t)frame);
+	db_printf("\n");
+}
+
+static void
+decode_syscall(int number, struct thread *td)
+{
+	struct proc *p;
+	c_db_sym_t sym;
+	db_expr_t diff;
+	sy_call_t *f;
+	const char *symname;
+
+	db_printf(" (%d", number);
+	p = (td != NULL) ? td->td_proc : NULL;
+	if (p != NULL && 0 <= number && number < p->p_sysent->sv_size) {
+		f = p->p_sysent->sv_table[number].sy_call;
+		sym = db_search_symbol((db_addr_t)f, DB_STGY_ANY, &diff);
+		if (sym != DB_SYM_NULL && diff == 0) {
+			db_symbol_values(sym, &symname, NULL);
+			db_printf(", %s, %s", p->p_sysent->sv_name, symname);
+		}
+	}
+	db_printf(")");
+}
+
+/*
+ * Figure out the next frame up in the call stack.
+ */
+static void
+db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
+{
+	struct trapframe *tf;
+	int frame_type;
+	long rip, rsp, rbp;
+	db_expr_t offset;
+	c_db_sym_t sym;
+	const char *name;
+
+	rip = db_get_value((long) &(*fp)->f_retaddr, 8, FALSE);
+	rbp = db_get_value((long) &(*fp)->f_frame, 8, FALSE);
+
+	/*
+	 * Figure out frame type.  We look at the address just before
+	 * the saved instruction pointer as the saved EIP is after the
+	 * call function, and if the function being called is marked as
+	 * dead (such as panic() at the end of dblfault_handler()), then
+	 * the instruction at the saved EIP will be part of a different
+	 * function (syscall() in this example) rather than the one that
+	 * actually made the call.
+	 */
+	frame_type = NORMAL;
+	sym = db_search_symbol(rip - 1, DB_STGY_ANY, &offset);
+	db_symbol_values(sym, &name, NULL);
+	if (name != NULL) {
+		if (strcmp(name, "calltrap") == 0 ||
+		    strcmp(name, "fork_trampoline") == 0 ||
+		    strcmp(name, "nmi_calltrap") == 0 ||
+		    strcmp(name, "Xdblfault") == 0)
+			frame_type = TRAP;
+		else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
+		    strncmp(name, "Xapic_isr", 9) == 0 ||
+		    strcmp(name, "Xtimerint") == 0 ||
+		    strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
+		    strcmp(name, "Xcpustop") == 0 ||
+		    strcmp(name, "Xcpususpend") == 0 ||
+		    strcmp(name, "Xrendezvous") == 0)
+			frame_type = INTERRUPT;
+		else if (strcmp(name, "Xfast_syscall") == 0)
+			frame_type = SYSCALL;
+#ifdef COMPAT_FREEBSD32
+		else if (strcmp(name, "Xint0x80_syscall") == 0)
+			frame_type = SYSCALL;
+#endif
+		/* XXX: These are interrupts with trap frames. */
+		else if (strcmp(name, "Xtimerint") == 0 ||
+		    strcmp(name, "Xcpustop") == 0 ||
+		    strcmp(name, "Xcpususpend") == 0 ||
+		    strcmp(name, "Xrendezvous") == 0 ||
+		    strcmp(name, "Xipi_intr_bitmap_handler") == 0)
+			frame_type = TRAP_INTERRUPT;
+	}
+
+	/*
+	 * Normal frames need no special processing.
+	 */
+	if (frame_type == NORMAL) {
+		*ip = (db_addr_t) rip;
+		*fp = (struct amd64_frame *) rbp;
+		return;
+	}
+
+	db_print_stack_entry(name, 0, 0, 0, rip, &(*fp)->f_frame);
+
+	/*
+	 * Point to base of trapframe which is just above the
+	 * current frame.
+	 */
+	tf = (struct trapframe *)((long)*fp + 16);
+
+	if (INKERNEL((long) tf)) {
+		rsp = get_rsp(tf);
+		rip = tf->tf_rip;
+		rbp = tf->tf_rbp;
+		switch (frame_type) {
+		case TRAP:
+			db_printf("--- trap %#r", tf->tf_trapno);
+			break;
+		case SYSCALL:
+			db_printf("--- syscall");
+			decode_syscall(tf->tf_rax, td);
+			break;
+		case TRAP_INTERRUPT:
+		case INTERRUPT:
+			db_printf("--- interrupt");
+			break;
+		default:
+			panic("The moon has moved again.");
+		}
+		db_printf(", rip = %#lr, rsp = %#lr, rbp = %#lr ---\n", rip,
+		    rsp, rbp);
+	}
+
+	*ip = (db_addr_t) rip;
+	*fp = (struct amd64_frame *) rbp;
+}
+
+static int
+db_backtrace(struct thread *td, struct trapframe *tf,
+    struct amd64_frame *frame, db_addr_t pc, int count)
+{
+	struct amd64_frame *actframe;
+#define MAXNARG	16
+	char *argnames[MAXNARG], **argnp = NULL;
+	const char *name;
+	long *argp;
+	db_expr_t offset;
+	c_db_sym_t sym;
+	int narg;
+	boolean_t first;
+
+	if (count == -1)
+		count = 1024;
+
+	first = TRUE;
+	while (count-- && !db_pager_quit) {
+		sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
+		db_symbol_values(sym, &name, NULL);
+
+		/*
+		 * Attempt to determine a (possibly fake) frame that gives
+		 * the caller's pc.  It may differ from `frame' if the
+		 * current function never sets up a standard frame or hasn't
+		 * set one up yet or has just discarded one.  The last two
+		 * cases can be guessed fairly reliably for code generated
+		 * by gcc.  The first case is too much trouble to handle in
+		 * general because the amount of junk on the stack depends
+		 * on the pc (the special handling of "calltrap", etc. in
+		 * db_nextframe() works because the `next' pc is special).
+		 */
+		actframe = frame;
+		if (first) {
+			if (tf != NULL) {
+				int instr;
+
+				instr = db_get_value(pc, 4, FALSE);
+				if ((instr & 0xffffffff) == 0xe5894855) {
+					/* pushq %rbp; movq %rsp, %rbp */
+					actframe = (void *)(get_rsp(tf) - 8);
+				} else if ((instr & 0xffffff) == 0xe58948) {
+					/* movq %rsp, %rbp */
+					actframe = (void *)get_rsp(tf);
+					if (tf->tf_rbp == 0) {
+						/* Fake frame better. */
+						frame = actframe;
+					}
+				} else if ((instr & 0xff) == 0xc3) {
+					/* ret */
+					actframe = (void *)(get_rsp(tf) - 8);
+				} else if (offset == 0) {
+					/* Probably an assembler symbol. */
+					actframe = (void *)(get_rsp(tf) - 8);
+				}
+			} else if (strcmp(name, "fork_trampoline") == 0) {
+				/*
+				 * Don't try to walk back on a stack for a
+				 * process that hasn't actually been run yet.
+				 */
+				db_print_stack_entry(name, 0, 0, 0, pc,
+				    actframe);
+				break;
+			}
+			first = FALSE;
+		}
+
+		argp = &actframe->f_arg0;
+		narg = MAXNARG;
+		if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) {
+			argnp = argnames;
+		} else {
+			narg = db_numargs(frame);
+		}
+
+		db_print_stack_entry(name, narg, argnp, argp, pc, actframe);
+
+		if (actframe != frame) {
+			/* `frame' belongs to caller. */
+			pc = (db_addr_t)
+			    db_get_value((long)&actframe->f_retaddr, 8, FALSE);
+			continue;
+		}
+
+		db_nextframe(&frame, &pc, td);
+
+		if (INKERNEL((long)pc) && !INKERNEL((long)frame)) {
+			sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
+			db_symbol_values(sym, &name, NULL);
+			db_print_stack_entry(name, 0, 0, 0, pc, frame);
+			break;
+		}
+		if (!INKERNEL((long) frame)) {
+			break;
+		}
+	}
+
+	return (0);
+}
+
+void
+db_trace_self(void)
+{
+	struct amd64_frame *frame;
+	db_addr_t callpc;
+	register_t rbp;
+
+	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
+	frame = (struct amd64_frame *)rbp;
+	callpc = (db_addr_t)db_get_value((long)&frame->f_retaddr, 8, FALSE);
+	frame = frame->f_frame;
+	db_backtrace(curthread, NULL, frame, callpc, -1);
+}
+
+int
+db_trace_thread(struct thread *thr, int count)
+{
+	struct pcb *ctx;
+
+	ctx = kdb_thr_ctx(thr);
+	return (db_backtrace(thr, NULL, (struct amd64_frame *)ctx->pcb_rbp,
+		    ctx->pcb_rip, count));
+}
+
+int
+amd64_set_watch(watchnum, watchaddr, size, access, d)
+	int watchnum;
+	unsigned long watchaddr;
+	int size;
+	int access;
+	struct dbreg *d;
+{
+	int i, len;
+
+	if (watchnum == -1) {
+		for (i = 0; i < 4; i++)
+			if (!DBREG_DR7_ENABLED(d->dr[7], i))
+				break;
+		if (i < 4)
+			watchnum = i;
+		else
+			return (-1);
+	}
+
+	switch (access) {
+	case DBREG_DR7_EXEC:
+		size = 1; /* size must be 1 for an execution breakpoint */
+		/* fall through */
+	case DBREG_DR7_WRONLY:
+	case DBREG_DR7_RDWR:
+		break;
+	default:
+		return (-1);
+	}
+
+	/*
+	 * we can watch a 1, 2, 4, or 8 byte sized location
+	 */
+	switch (size) {
+	case 1:
+		len = DBREG_DR7_LEN_1;
+		break;
+	case 2:
+		len = DBREG_DR7_LEN_2;
+		break;
+	case 4:
+		len = DBREG_DR7_LEN_4;
+		break;
+	case 8:
+		len = DBREG_DR7_LEN_8;
+		break;
+	default:
+		return (-1);
+	}
+
+	/* clear the bits we are about to affect */
+	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
+
+	/* set drN register to the address, N=watchnum */
+	DBREG_DRX(d, watchnum) = watchaddr;
+
+	/* enable the watchpoint */
+	d->dr[7] |= DBREG_DR7_SET(watchnum, len, access,
+	    DBREG_DR7_GLOBAL_ENABLE);
+
+	return (watchnum);
+}
+
+
+int
+amd64_clr_watch(watchnum, d)
+	int watchnum;
+	struct dbreg *d;
+{
+
+	if (watchnum < 0 || watchnum >= 4)
+		return (-1);
+
+	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
+	DBREG_DRX(d, watchnum) = 0;
+
+	return (0);
+}
+
+
+int
+db_md_set_watchpoint(addr, size)
+	db_expr_t addr;
+	db_expr_t size;
+{
+	struct dbreg d;
+	int avail, i, wsize;
+
+	fill_dbregs(NULL, &d);
+
+	avail = 0;
+	for(i = 0; i < 4; i++) {
+		if (!DBREG_DR7_ENABLED(d.dr[7], i))
+			avail++;
+	}
+
+	if (avail * 8 < size)
+		return (-1);
+
+	for (i = 0; i < 4 && (size > 0); i++) {
+		if (!DBREG_DR7_ENABLED(d.dr[7], i)) {
+			if (size >= 8 || (avail == 1 && size > 4))
+				wsize = 8;
+			else if (size > 2)
+				wsize = 4;
+			else
+				wsize = size;
+			amd64_set_watch(i, addr, wsize,
+				       DBREG_DR7_WRONLY, &d);
+			addr += wsize;
+			size -= wsize;
+			avail--;
+		}
+	}
+
+	set_dbregs(NULL, &d);
+
+	return(0);
+}
+
+
+int
+db_md_clr_watchpoint(addr, size)
+	db_expr_t addr;
+	db_expr_t size;
+{
+	struct dbreg d;
+	int i;
+
+	fill_dbregs(NULL, &d);
+
+	for(i = 0; i < 4; i++) {
+		if (DBREG_DR7_ENABLED(d.dr[7], i)) {
+			if ((DBREG_DRX((&d), i) >= addr) &&
+			    (DBREG_DRX((&d), i) < addr+size))
+				amd64_clr_watch(i, &d);
+
+		}
+	}
+
+	set_dbregs(NULL, &d);
+
+	return(0);
+}
+
+
+static const char *
+watchtype_str(type)
+	int type;
+{
+	switch (type) {
+		case DBREG_DR7_EXEC   : return "execute";    break;
+		case DBREG_DR7_RDWR   : return "read/write"; break;
+		case DBREG_DR7_WRONLY : return "write";	     break;
+		default		      : return "invalid";    break;
+	}
+}
+
+
+void
+db_md_list_watchpoints()
+{
+	struct dbreg d;
+	int i, len, type;
+
+	fill_dbregs(NULL, &d);
+
+	db_printf("\nhardware watchpoints:\n");
+	db_printf("  watch    status        type  len             address\n");
+	db_printf("  -----  --------  ----------  ---  ------------------\n");
+	for (i = 0; i < 4; i++) {
+		if (DBREG_DR7_ENABLED(d.dr[7], i)) {
+			type = DBREG_DR7_ACCESS(d.dr[7], i);
+			len = DBREG_DR7_LEN(d.dr[7], i);
+			if (len == DBREG_DR7_LEN_8)
+				len = 8;
+			else
+				len++;
+			db_printf("  %-5d  %-8s  %10s  %3d  ",
+			    i, "enabled", watchtype_str(type), len);
+			db_printsym((db_addr_t)DBREG_DRX((&d), i), DB_STGY_ANY);
+			db_printf("\n");
+		} else {
+			db_printf("  %-5d  disabled\n", i);
+		}
+	}
+
+	db_printf("\ndebug register values:\n");
+	for (i = 0; i < 8; i++) {
+		db_printf("  dr%d 0x%016lx\n", i, DBREG_DRX((&d), i));
+	}
+	db_printf("\n");
+}
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
new file mode 100644
index 0000000..fdc4d56
--- /dev/null
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -0,0 +1,278 @@
+/*-
+ * Copyright 1996-1998 John D. Polstra.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/linker.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/imgact_elf.h>
+#include <sys/syscall.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_param.h>
+
+#include <machine/elf.h>
+#include <machine/md_var.h>
+
+struct sysentvec elf64_freebsd_sysvec = {
+	.sv_size	= SYS_MAXSYSCALL,
+	.sv_table	= sysent,
+	.sv_mask	= 0,
+	.sv_sigsize	= 0,
+	.sv_sigtbl	= NULL,
+	.sv_errsize	= 0,
+	.sv_errtbl	= NULL,
+	.sv_transtrap	= NULL,
+	.sv_fixup	= __elfN(freebsd_fixup),
+	.sv_sendsig	= sendsig,
+	.sv_sigcode	= sigcode,
+	.sv_szsigcode	= &szsigcode,
+	.sv_prepsyscall	= NULL,
+	.sv_name	= "FreeBSD ELF64",
+	.sv_coredump	= __elfN(coredump),
+	.sv_imgact_try	= NULL,
+	.sv_minsigstksz	= MINSIGSTKSZ,
+	.sv_pagesize	= PAGE_SIZE,
+	.sv_minuser	= VM_MIN_ADDRESS,
+	.sv_maxuser	= VM_MAXUSER_ADDRESS,
+	.sv_usrstack	= USRSTACK,
+	.sv_psstrings	= PS_STRINGS,
+	.sv_stackprot	= VM_PROT_ALL,
+	.sv_copyout_strings	= exec_copyout_strings,
+	.sv_setregs	= exec_setregs,
+	.sv_fixlimit	= NULL,
+	.sv_maxssiz	= NULL,
+	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP,
+	.sv_set_syscall_retval = cpu_set_syscall_retval,
+	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
+	.sv_syscallnames = syscallnames,
+	.sv_shared_page_base = SHAREDPAGE,
+	.sv_shared_page_len = PAGE_SIZE,
+	.sv_schedtail	= NULL,
+};
+INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
+
+static Elf64_Brandinfo freebsd_brand_info = {
+	.brand		= ELFOSABI_FREEBSD,
+	.machine	= EM_X86_64,
+	.compat_3_brand	= "FreeBSD",
+	.emul_path	= NULL,
+	.interp_path	= "/libexec/ld-elf.so.1",
+	.sysvec		= &elf64_freebsd_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &elf64_freebsd_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+};
+
+SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
+	(sysinit_cfunc_t) elf64_insert_brand_entry,
+	&freebsd_brand_info);
+
+static Elf64_Brandinfo freebsd_brand_oinfo = {
+	.brand		= ELFOSABI_FREEBSD,
+	.machine	= EM_X86_64,
+	.compat_3_brand	= "FreeBSD",
+	.emul_path	= NULL,
+	.interp_path	= "/usr/libexec/ld-elf.so.1",
+	.sysvec		= &elf64_freebsd_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &elf64_freebsd_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+};
+
+SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+	(sysinit_cfunc_t) elf64_insert_brand_entry,
+	&freebsd_brand_oinfo);
+
+static Elf64_Brandinfo kfreebsd_brand_info = {
+	.brand		= ELFOSABI_FREEBSD,
+	.machine	= EM_X86_64,
+	.compat_3_brand	= "FreeBSD",
+	.emul_path	= NULL,
+	.interp_path	= "/lib/ld-kfreebsd-x86-64.so.1",
+	.sysvec		= &elf64_freebsd_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &elf64_kfreebsd_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
+};
+
+SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+	(sysinit_cfunc_t) elf64_insert_brand_entry,
+	&kfreebsd_brand_info);
+
+void
+elf64_dump_thread(struct thread *td __unused, void *dst __unused,
+    size_t *off __unused)
+{
+}
+
+
+/* Process one elf relocation with addend. */
+static int
+elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
+    int type, int local, elf_lookup_fn lookup)
+{
+	Elf64_Addr *where, val;
+	Elf32_Addr *where32, val32;
+	Elf_Addr addr;
+	Elf_Addr addend;
+	Elf_Size rtype, symidx;
+	const Elf_Rel *rel;
+	const Elf_Rela *rela;
+
+	switch (type) {
+	case ELF_RELOC_REL:
+		rel = (const Elf_Rel *)data;
+		where = (Elf_Addr *) (relocbase + rel->r_offset);
+		rtype = ELF_R_TYPE(rel->r_info);
+		symidx = ELF_R_SYM(rel->r_info);
+		/* Addend is 32 bit on 32 bit relocs */
+		switch (rtype) {
+		case R_X86_64_PC32:
+		case R_X86_64_32S:
+			addend = *(Elf32_Addr *)where;
+			break;
+		default:
+			addend = *where;
+			break;
+		}
+		break;
+	case ELF_RELOC_RELA:
+		rela = (const Elf_Rela *)data;
+		where = (Elf_Addr *) (relocbase + rela->r_offset);
+		addend = rela->r_addend;
+		rtype = ELF_R_TYPE(rela->r_info);
+		symidx = ELF_R_SYM(rela->r_info);
+		break;
+	default:
+		panic("unknown reloc type %d\n", type);
+	}
+
+	switch (rtype) {
+
+		case R_X86_64_NONE:	/* none */
+			break;
+
+		case R_X86_64_64:		/* S + A */
+			addr = lookup(lf, symidx, 1);
+			val = addr + addend;
+			if (addr == 0)
+				return -1;
+			if (*where != val)
+				*where = val;
+			break;
+
+		case R_X86_64_PC32:	/* S + A - P */
+			addr = lookup(lf, symidx, 1);
+			where32 = (Elf32_Addr *)where;
+			val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where);
+			if (addr == 0)
+				return -1;
+			if (*where32 != val32)
+				*where32 = val32;
+			break;
+
+		case R_X86_64_32S:	/* S + A sign extend */
+			addr = lookup(lf, symidx, 1);
+			val32 = (Elf32_Addr)(addr + addend);
+			where32 = (Elf32_Addr *)where;
+			if (addr == 0)
+				return -1;
+			if (*where32 != val32)
+				*where32 = val32;
+			break;
+
+		case R_X86_64_COPY:	/* none */
+			/*
+			 * There shouldn't be copy relocations in kernel
+			 * objects.
+			 */
+			printf("kldload: unexpected R_COPY relocation\n");
+			return -1;
+			break;
+
+		case R_X86_64_GLOB_DAT:	/* S */
+		case R_X86_64_JMP_SLOT:	/* XXX need addend + offset */
+			addr = lookup(lf, symidx, 1);
+			if (addr == 0)
+				return -1;
+			if (*where != addr)
+				*where = addr;
+			break;
+
+		case R_X86_64_RELATIVE:	/* B + A */
+			addr = relocbase + addend;
+			val = addr;
+			if (*where != val)
+				*where = val;
+			break;
+
+		default:
+			printf("kldload: unexpected relocation type %ld\n",
+			       rtype);
+			return -1;
+	}
+	return(0);
+}
+
+int
+elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
+    elf_lookup_fn lookup)
+{
+
+	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
+}
+
+int
+elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
+    int type, elf_lookup_fn lookup)
+{
+
+	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
+}
+
+int
+elf_cpu_load_file(linker_file_t lf __unused)
+{
+
+	return (0);
+}
+
+int
+elf_cpu_unload_file(linker_file_t lf __unused)
+{
+
+	return (0);
+}
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
new file mode 100644
index 0000000..89ad638
--- /dev/null
+++ b/sys/amd64/amd64/exception.S
@@ -0,0 +1,878 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * Copyright (c) 2007 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_atpic.h"
+#include "opt_compat.h"
+#include "opt_hwpmc_hooks.h"
+#include "opt_kdtrace.h"
+
+#include <machine/asmacros.h>
+#include <machine/psl.h>
+#include <machine/trap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+#ifdef KDTRACE_HOOKS
+	.bss
+	.globl	dtrace_invop_jump_addr
+	.align	8
+	.type	dtrace_invop_jump_addr,@object
+	.size	dtrace_invop_jump_addr,8
+dtrace_invop_jump_addr:
+	.zero	8
+	.globl	dtrace_invop_calltrap_addr
+	.align	8
+	.type	dtrace_invop_calltrap_addr,@object
+	.size	dtrace_invop_calltrap_addr,8
+dtrace_invop_calltrap_addr:
+	.zero	8
+#endif
+	.text
+#ifdef HWPMC_HOOKS
+	ENTRY(start_exceptions)
+#endif
+
+/*****************************************************************************/
+/* Trap handling                                                             */
+/*****************************************************************************/
+/*
+ * Trap and fault vector routines.
+ *
+ * All traps are 'interrupt gates', SDT_SYSIGT.  An interrupt gate pushes
+ * state on the stack but also disables interrupts.  This is important for
+ * us for the use of the swapgs instruction.  We cannot be interrupted
+ * until the GS.base value is correct.  For most traps, we automatically
+ * then enable interrupts if the interrupted context had them enabled.
+ * This is equivalent to the i386 port's use of SDT_SYS386TGT.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process.  See amd64/include/frame.h.  
+ * This includes the current RFLAGS (status register, which includes 
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu.  The cpu 
+ * will also push an 'error' code for certain traps.  We push a dummy 
+ * error code for those traps where the cpu doesn't in order to maintain 
+ * a consistent frame.  We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we 
+ * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ */
+
+MCOUNT_LABEL(user)
+MCOUNT_LABEL(btrap)
+
+/* Traps that we leave interrupts disabled for.. */
+#define	TRAP_NOEN(a)	\
+	subq $TF_RIP,%rsp; \
+	movl $(a),TF_TRAPNO(%rsp) ; \
+	movq $0,TF_ADDR(%rsp) ; \
+	movq $0,TF_ERR(%rsp) ; \
+	jmp alltraps_noen
+IDTVEC(dbg)
+	TRAP_NOEN(T_TRCTRAP)
+IDTVEC(bpt)
+	TRAP_NOEN(T_BPTFLT)
+#ifdef KDTRACE_HOOKS
+IDTVEC(dtrace_ret)
+	TRAP_NOEN(T_DTRACE_RET)
+#endif
+
+/* Regular traps; The cpu does not supply tf_err for these. */
+#define	TRAP(a)	 \
+	subq $TF_RIP,%rsp; \
+	movl $(a),TF_TRAPNO(%rsp) ; \
+	movq $0,TF_ADDR(%rsp) ; \
+	movq $0,TF_ERR(%rsp) ; \
+	jmp alltraps
+IDTVEC(div)
+	TRAP(T_DIVIDE)
+IDTVEC(ofl)
+	TRAP(T_OFLOW)
+IDTVEC(bnd)
+	TRAP(T_BOUND)
+IDTVEC(ill)
+	TRAP(T_PRIVINFLT)
+IDTVEC(dna)
+	TRAP(T_DNA)
+IDTVEC(fpusegm)
+	TRAP(T_FPOPFLT)
+IDTVEC(mchk)
+	TRAP(T_MCHK)
+IDTVEC(rsvd)
+	TRAP(T_RESERVED)
+IDTVEC(fpu)
+	TRAP(T_ARITHTRAP)
+IDTVEC(xmm)
+	TRAP(T_XMMFLT)
+
+/* This group of traps have tf_err already pushed by the cpu */
+#define	TRAP_ERR(a)	\
+	subq $TF_ERR,%rsp; \
+	movl $(a),TF_TRAPNO(%rsp) ; \
+	movq $0,TF_ADDR(%rsp) ; \
+	jmp alltraps
+IDTVEC(tss)
+	TRAP_ERR(T_TSSFLT)
+IDTVEC(missing)
+	TRAP_ERR(T_SEGNPFLT)
+IDTVEC(stk)
+	TRAP_ERR(T_STKFLT)
+IDTVEC(align)
+	TRAP_ERR(T_ALIGNFLT)
+
+	/*
+	 * alltraps entry point.  Use swapgs if this is the first time in the
+	 * kernel from userland.  Reenable interrupts if they were enabled
+	 * before the trap.  This approximates SDT_SYS386TGT on the i386 port.
+	 */
+	SUPERALIGN_TEXT
+	.globl	alltraps
+	.type	alltraps,@function
+alltraps:
+	movq	%rdi,TF_RDI(%rsp)
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	alltraps_testi		/* already running with kernel GS.base */
+	swapgs
+	movq	PCPU(CURPCB),%rdi
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+alltraps_testi:
+	testl	$PSL_I,TF_RFLAGS(%rsp)
+	jz	alltraps_pushregs_no_rdi
+	sti
+alltraps_pushregs_no_rdi:
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	FAKE_MCOUNT(TF_RIP(%rsp))
+#ifdef KDTRACE_HOOKS
+	/*
+	 * DTrace Function Boundary Trace (fbt) probes are triggered
+	 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
+	 * interrupt. For all other trap types, just handle them in
+	 * the usual way.
+	 */
+	cmpl	$T_BPTFLT,TF_TRAPNO(%rsp)
+	jne	calltrap
+
+	/* Check if there is no DTrace hook registered. */
+	cmpq	$0,dtrace_invop_jump_addr
+	je	calltrap
+
+	/*
+	 * Set our jump address for the jump back in the event that
+	 * the breakpoint wasn't caused by DTrace at all.
+	 */
+	movq	$calltrap,dtrace_invop_calltrap_addr(%rip)
+
+	/* Jump to the code hooked in by DTrace. */
+	movq	dtrace_invop_jump_addr,%rax
+	jmpq	*dtrace_invop_jump_addr
+#endif
+	.globl	calltrap
+	.type	calltrap,@function
+calltrap:
+	movq	%rsp,%rdi
+	call	trap
+	MEXITCOUNT
+	jmp	doreti			/* Handle any pending ASTs */
+
+	/*
+	 * alltraps_noen entry point.  Unlike alltraps above, we want to
+	 * leave the interrupts disabled.  This corresponds to
+	 * SDT_SYS386IGT on the i386 port.
+	 */
+	SUPERALIGN_TEXT
+	.globl	alltraps_noen
+	.type	alltraps_noen,@function
+alltraps_noen:
+	movq	%rdi,TF_RDI(%rsp)
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	1f	/* already running with kernel GS.base */
+	swapgs
+	movq	PCPU(CURPCB),%rdi
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
+1:	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	jmp	alltraps_pushregs_no_rdi
+
+IDTVEC(dblfault)
+	subq	$TF_ERR,%rsp
+	movl	$T_DOUBLEFLT,TF_TRAPNO(%rsp)
+	movq	$0,TF_ADDR(%rsp)
+	movq	$0,TF_ERR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	1f			/* already running with kernel GS.base */
+	swapgs
+1:
+	movq	%rsp,%rdi
+	call	dblfault_handler
+2:
+	hlt
+	jmp	2b
+
+IDTVEC(page)
+	subq	$TF_ERR,%rsp
+	movl	$T_PAGEFLT,TF_TRAPNO(%rsp)
+	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	1f			/* already running with kernel GS.base */
+	swapgs
+	movq	PCPU(CURPCB),%rdi
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
+1:	movq	%cr2,%rdi		/* preserve %cr2 before ..  */
+	movq	%rdi,TF_ADDR(%rsp)	/* enabling interrupts. */
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	testl	$PSL_I,TF_RFLAGS(%rsp)
+	jz	alltraps_pushregs_no_rdi
+	sti
+	jmp	alltraps_pushregs_no_rdi
+
+	/*
+	 * We have to special-case this one.  If we get a trap in doreti() at
+	 * the iretq stage, we'll reenter with the wrong gs state.  We'll have
+	 * to do a special the swapgs in this case even coming from the kernel.
+	 * XXX linux has a trap handler for their equivalent of load_gs().
+	 */
+IDTVEC(prot)
+	subq	$TF_ERR,%rsp
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	$0,TF_ADDR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
+	leaq	doreti_iret(%rip),%rdi
+	cmpq	%rdi,TF_RIP(%rsp)
+	je	1f			/* kernel but with user gsbase!! */
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	2f			/* already running with kernel GS.base */
+1:	swapgs
+2:	movq	PCPU(CURPCB),%rdi
+	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)	/* always full iret from GPF */
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	testl	$PSL_I,TF_RFLAGS(%rsp)
+	jz	alltraps_pushregs_no_rdi
+	sti
+	jmp	alltraps_pushregs_no_rdi
+
+/*
+ * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
+ * and the new privilige level.  We are still running on the old user stack
+ * pointer.  We have to juggle a few things around to find our stack etc.
+ * swapgs gives us access to our PCPU space only.
+ *
+ * We do not support invoking this from a custom %cs or %ss (e.g. using
+ * entries from an LDT).
+ */
+IDTVEC(fast_syscall)
+	swapgs
+	movq	%rsp,PCPU(SCRATCH_RSP)
+	movq	PCPU(RSP0),%rsp
+	/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
+	subq	$TF_SIZE,%rsp
+	/* defer TF_RSP till we have a spare register */
+	movq	%r11,TF_RFLAGS(%rsp)
+	movq	%rcx,TF_RIP(%rsp)	/* %rcx original value is in %r10 */
+	movq	PCPU(SCRATCH_RSP),%r11	/* %r11 already saved */
+	movq	%r11,TF_RSP(%rsp)	/* user stack pointer */
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	movq	PCPU(CURPCB),%r11
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%r11)
+	sti
+	movq	$KUDSEL,TF_SS(%rsp)
+	movq	$KUCSEL,TF_CS(%rsp)
+	movq	$2,TF_ERR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)	/* arg 1 */
+	movq	%rsi,TF_RSI(%rsp)	/* arg 2 */
+	movq	%rdx,TF_RDX(%rsp)	/* arg 3 */
+	movq	%r10,TF_RCX(%rsp)	/* arg 4 */
+	movq	%r8,TF_R8(%rsp)		/* arg 5 */
+	movq	%r9,TF_R9(%rsp)		/* arg 6 */
+	movq	%rax,TF_RAX(%rsp)	/* syscall number */
+	movq	%rbx,TF_RBX(%rsp)	/* C preserved */
+	movq	%rbp,TF_RBP(%rsp)	/* C preserved */
+	movq	%r12,TF_R12(%rsp)	/* C preserved */
+	movq	%r13,TF_R13(%rsp)	/* C preserved */
+	movq	%r14,TF_R14(%rsp)	/* C preserved */
+	movq	%r15,TF_R15(%rsp)	/* C preserved */
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	PCPU(CURTHREAD),%rdi
+	movq	%rsp,TD_FRAME(%rdi)
+	movl	TF_RFLAGS(%rsp),%esi
+	andl	$PSL_T,%esi
+	call	amd64_syscall
+1:	movq	PCPU(CURPCB),%rax
+	/* Disable interrupts before testing PCB_FULL_IRET. */
+	cli
+	testl	$PCB_FULL_IRET,PCB_FLAGS(%rax)
+	jnz	3f
+	/* Check for and handle AST's on return to userland. */
+	movq	PCPU(CURTHREAD),%rax
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
+	jne	2f
+	/* Restore preserved registers. */
+	MEXITCOUNT
+	movq	TF_RDI(%rsp),%rdi	/* bonus; preserve arg 1 */
+	movq	TF_RSI(%rsp),%rsi	/* bonus: preserve arg 2 */
+	movq	TF_RDX(%rsp),%rdx	/* return value 2 */
+	movq	TF_RAX(%rsp),%rax	/* return value 1 */
+	movq	TF_RFLAGS(%rsp),%r11	/* original %rflags */
+	movq	TF_RIP(%rsp),%rcx	/* original %rip */
+	movq	TF_RSP(%rsp),%rsp	/* user stack pointer */
+	swapgs
+	sysretq
+
+2:	/* AST scheduled. */
+	sti
+	movq	%rsp,%rdi
+	call	ast
+	jmp	1b
+
+3:	/* Requested full context restore, use doreti for that. */
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Here for CYA insurance, in case a "syscall" instruction gets
+ * issued from 32 bit compatability mode. MSR_CSTAR has to point
+ * to *something* if EFER_SCE is enabled.
+ */
+IDTVEC(fast_syscall32)
+	sysret
+
+/*
+ * NMI handling is special.
+ *
+ * First, NMIs do not respect the state of the processor's RFLAGS.IF
+ * bit.  The NMI handler may be entered at any time, including when
+ * the processor is in a critical section with RFLAGS.IF == 0.
+ * The processor's GS.base value could be invalid on entry to the
+ * handler.
+ *
+ * Second, the processor treats NMIs specially, blocking further NMIs
+ * until an 'iretq' instruction is executed.  We thus need to execute
+ * the NMI handler with interrupts disabled, to prevent a nested interrupt
+ * from executing an 'iretq' instruction and inadvertently taking the
+ * processor out of NMI mode.
+ *
+ * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
+ * GS.base value for the processor is stored just above the bottom of its
+ * NMI stack.  For NMIs taken from kernel mode, the current value in
+ * the processor's GS.base is saved at entry to C-preserved register %r12,
+ * the canonical value for GS.base is then loaded into the processor, and
+ * the saved value is restored at exit time.  For NMIs taken from user mode,
+ * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
+ */
+
+IDTVEC(nmi)
+	subq	$TF_RIP,%rsp
+	movl	$(T_NMI),TF_TRAPNO(%rsp)
+	movq	$0,TF_ADDR(%rsp)
+	movq	$0,TF_ERR(%rsp)
+	movq	%rdi,TF_RDI(%rsp)
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	xorl	%ebx,%ebx
+	testb	$SEL_RPL_MASK,TF_CS(%rsp)
+	jnz	nmi_fromuserspace
+	/*
+	 * We've interrupted the kernel.  Preserve GS.base in %r12.
+	 */
+	movl	$MSR_GSBASE,%ecx
+	rdmsr
+	movq	%rax,%r12
+	shlq	$32,%rdx
+	orq	%rdx,%r12
+	/* Retrieve and load the canonical value for GS.base. */
+	movq	TF_SIZE(%rsp),%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+	jmp	nmi_calltrap
+nmi_fromuserspace:
+	incl	%ebx
+	swapgs
+/* Note: this label is also used by ddb and gdb: */
+nmi_calltrap:
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp,%rdi
+	call	trap
+	MEXITCOUNT
+#ifdef HWPMC_HOOKS
+	/*
+	 * Capture a userspace callchain if needed.
+	 * 
+	 * - Check if the current trap was from user mode.
+	 * - Check if the current thread is valid.
+	 * - Check if the thread requires a user call chain to be
+	 *   captured.
+	 *
+	 * We are still in NMI mode at this point.
+	 */
+	testl	%ebx,%ebx
+	jz	nocallchain	/* not from userspace */
+	movq	PCPU(CURTHREAD),%rax
+	orq	%rax,%rax	/* curthread present? */
+	jz	nocallchain
+	testl	$TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
+	jz	nocallchain
+	/*
+	 * A user callchain is to be captured, so:
+	 * - Move execution to the regular kernel stack, to allow for
+	 *   nested NMI interrupts.
+	 * - Take the processor out of "NMI" mode by faking an "iret".
+	 * - Enable interrupts, so that copyin() can work.
+	 */
+	movq	%rsp,%rsi	/* source stack pointer */
+	movq	$TF_SIZE,%rcx
+	movq	PCPU(RSP0),%rdx
+	subq	%rcx,%rdx
+	movq	%rdx,%rdi	/* destination stack pointer */
+
+	shrq	$3,%rcx		/* trap frame size in long words */
+	cld
+	rep
+	movsq			/* copy trapframe */
+
+	movl	%ss,%eax
+	pushq	%rax		/* tf_ss */
+	pushq	%rdx		/* tf_rsp (on kernel stack) */
+	pushfq			/* tf_rflags */
+	movl	%cs,%eax
+	pushq	%rax		/* tf_cs */
+	pushq	$outofnmi	/* tf_rip */
+	iretq
+outofnmi:
+	/*
+	 * At this point the processor has exited NMI mode and is running
+	 * with interrupts turned off on the normal kernel stack.
+	 *
+	 * If a pending NMI gets recognized at or after this point, it 
+	 * will cause a kernel callchain to be traced.
+	 *
+	 * We turn interrupts back on, and call the user callchain capture hook.
+	 */
+	movq	pmc_hook,%rax
+	orq	%rax,%rax
+	jz	nocallchain
+	movq	PCPU(CURTHREAD),%rdi		/* thread */
+	movq	$PMC_FN_USER_CALLCHAIN,%rsi	/* command */
+	movq	%rsp,%rdx			/* frame */
+	sti
+	call	*%rax
+	cli
+nocallchain:
+#endif
+	testl	%ebx,%ebx
+	jnz	doreti_exit
+nmi_kernelexit:	
+	/*
+	 * Put back the preserved MSR_GSBASE value.
+	 */
+	movl	$MSR_GSBASE,%ecx
+	movq	%r12,%rdx
+	movl	%edx,%eax
+	shrq	$32,%rdx
+	wrmsr
+nmi_restoreregs:
+	movq	TF_RDI(%rsp),%rdi
+	movq	TF_RSI(%rsp),%rsi
+	movq	TF_RDX(%rsp),%rdx
+	movq	TF_RCX(%rsp),%rcx
+	movq	TF_R8(%rsp),%r8
+	movq	TF_R9(%rsp),%r9
+	movq	TF_RAX(%rsp),%rax
+	movq	TF_RBX(%rsp),%rbx
+	movq	TF_RBP(%rsp),%rbp
+	movq	TF_R10(%rsp),%r10
+	movq	TF_R11(%rsp),%r11
+	movq	TF_R12(%rsp),%r12
+	movq	TF_R13(%rsp),%r13
+	movq	TF_R14(%rsp),%r14
+	movq	TF_R15(%rsp),%r15
+	addq	$TF_RIP,%rsp
+	jmp	doreti_iret
+
+ENTRY(fork_trampoline)
+	movq	%r12,%rdi		/* function */
+	movq	%rbx,%rsi		/* arg1 */
+	movq	%rsp,%rdx		/* trapframe pointer */
+	call	fork_exit
+	MEXITCOUNT
+	jmp	doreti			/* Handle any ASTs */
+
+/*
+ * To efficiently implement classification of trap and interrupt handlers
+ * for profiling, there must be only trap handlers between the labels btrap
+ * and bintr, and only interrupt handlers between the labels bintr and
+ * eintr.  This is implemented (partly) by including files that contain
+ * some of the handlers.  Before including the files, set up a normal asm
+ * environment so that the included files doen't need to know that they are
+ * included.
+ */
+
+#ifdef COMPAT_FREEBSD32
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+
+#include <amd64/ia32/ia32_exception.S>
+#endif
+
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+MCOUNT_LABEL(bintr)
+
+#include <amd64/amd64/apic_vector.S>
+
+#ifdef DEV_ATPIC
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+
+#include <amd64/amd64/atpic_vector.S>
+#endif
+
+	.text
+MCOUNT_LABEL(eintr)
+
+/*
+ * void doreti(struct trapframe)
+ *
+ * Handle return from interrupts, traps and syscalls.
+ */
+	.text
+	SUPERALIGN_TEXT
+	.type	doreti,@function
+doreti:
+	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
+	/*
+	 * Check if ASTs can be handled now.
+	 */
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
+	jz	doreti_exit		/* can't handle ASTs now if not */
+
+doreti_ast:
+	/*
+	 * Check for ASTs atomically with returning.  Disabling CPU
+	 * interrupts provides sufficient locking even in the SMP case,
+	 * since we will be informed of any new ASTs by an IPI.
+	 */
+	cli
+	movq	PCPU(CURTHREAD),%rax
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
+	je	doreti_exit
+	sti
+	movq	%rsp,%rdi	/* pass a pointer to the trapframe */
+	call	ast
+	jmp	doreti_ast
+
+	/*
+	 * doreti_exit:	pop registers, iret.
+	 *
+	 *	The segment register pop is a special case, since it may
+	 *	fault if (for example) a sigreturn specifies bad segment
+	 *	registers.  The fault is handled in trap.c.
+	 */
+doreti_exit:
+	MEXITCOUNT
+	movq	PCPU(CURPCB),%r8
+
+	/*
+	 * Do not reload segment registers for kernel.
+	 * Since we do not reload segments registers with sane
+	 * values on kernel entry, descriptors referenced by
+	 * segments registers might be not valid.  This is fatal
+	 * for user mode, but is not a problem for the kernel.
+	 */
+	testb	$SEL_RPL_MASK,TF_CS(%rsp)
+	jz	ld_regs
+	testl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
+	jz	ld_regs
+	testl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	je	set_segs
+
+do_segs:
+	/* Restore %fs and fsbase */
+	movw	TF_FS(%rsp),%ax
+	.globl	ld_fs
+ld_fs:
+	movw	%ax,%fs
+	cmpw	$KUF32SEL,%ax
+	jne	1f
+	movl	$MSR_FSBASE,%ecx
+	movl	PCB_FSBASE(%r8),%eax
+	movl	PCB_FSBASE+4(%r8),%edx
+	.globl	ld_fsbase
+ld_fsbase:
+	wrmsr
+1:
+	/* Restore %gs and gsbase */
+	movw	TF_GS(%rsp),%si
+	pushfq
+	cli
+	movl	$MSR_GSBASE,%ecx
+	rdmsr
+	.globl	ld_gs
+ld_gs:
+	movw	%si,%gs
+	wrmsr
+	popfq
+	cmpw	$KUG32SEL,%si
+	jne	1f
+	movl	$MSR_KGSBASE,%ecx
+	movl	PCB_GSBASE(%r8),%eax
+	movl	PCB_GSBASE+4(%r8),%edx
+	.globl	ld_gsbase
+ld_gsbase:
+	wrmsr
+1:
+	.globl	ld_es
+ld_es:
+	movw	TF_ES(%rsp),%es
+	.globl	ld_ds
+ld_ds:
+	movw	TF_DS(%rsp),%ds
+ld_regs:
+	movq	TF_RDI(%rsp),%rdi
+	movq	TF_RSI(%rsp),%rsi
+	movq	TF_RDX(%rsp),%rdx
+	movq	TF_RCX(%rsp),%rcx
+	movq	TF_R8(%rsp),%r8
+	movq	TF_R9(%rsp),%r9
+	movq	TF_RAX(%rsp),%rax
+	movq	TF_RBX(%rsp),%rbx
+	movq	TF_RBP(%rsp),%rbp
+	movq	TF_R10(%rsp),%r10
+	movq	TF_R11(%rsp),%r11
+	movq	TF_R12(%rsp),%r12
+	movq	TF_R13(%rsp),%r13
+	movq	TF_R14(%rsp),%r14
+	movq	TF_R15(%rsp),%r15
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+	jz	1f			/* keep running with kernel GS.base */
+	cli
+	swapgs
+1:
+	addq	$TF_RIP,%rsp		/* skip over tf_err, tf_trapno */
+	.globl	doreti_iret
+doreti_iret:
+	iretq
+
+set_segs:
+	movw	$KUDSEL,%ax
+	movw	%ax,TF_DS(%rsp)
+	movw	%ax,TF_ES(%rsp)
+	movw	$KUF32SEL,TF_FS(%rsp)
+	movw	$KUG32SEL,TF_GS(%rsp)
+	jmp	do_segs
+
+	/*
+	 * doreti_iret_fault.  Alternative return code for
+	 * the case where we get a fault in the doreti_exit code
+	 * above.  trap() (amd64/amd64/trap.c) catches this specific
+	 * case, sends the process a signal and continues in the
+	 * corresponding place in the code below.
+	 */
+	ALIGN_TEXT
+	.globl	doreti_iret_fault
+doreti_iret_fault:
+	subq	$TF_RIP,%rsp		/* space including tf_err, tf_trapno */
+	testl	$PSL_I,TF_RFLAGS(%rsp)
+	jz	1f
+	sti
+1:
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	movq	%rdi,TF_RDI(%rsp)
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	$0,TF_ERR(%rsp)	/* XXX should be the error code */
+	movq	$0,TF_ADDR(%rsp)
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	jmp	calltrap
+
+	ALIGN_TEXT
+	.globl	ds_load_fault
+ds_load_fault:
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movw	$KUDSEL,TF_DS(%rsp)
+	jmp	doreti
+
+	ALIGN_TEXT
+	.globl	es_load_fault
+es_load_fault:
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movw	$KUDSEL,TF_ES(%rsp)
+	jmp	doreti
+
+	ALIGN_TEXT
+	.globl	fs_load_fault
+fs_load_fault:
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movw	$KUF32SEL,TF_FS(%rsp)
+	jmp	doreti
+
+	ALIGN_TEXT
+	.globl	gs_load_fault
+gs_load_fault:
+	popfq
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movw	$KUG32SEL,TF_GS(%rsp)
+	jmp	doreti
+
+	ALIGN_TEXT
+	.globl	fsbase_load_fault
+fsbase_load_fault:
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movq	PCPU(CURTHREAD),%r8
+	movq	TD_PCB(%r8),%r8
+	movq	$0,PCB_FSBASE(%r8)
+	jmp	doreti
+
+	ALIGN_TEXT
+	.globl	gsbase_load_fault
+gsbase_load_fault:
+	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
+	movq	%rsp,%rdi
+	call	trap
+	movq	PCPU(CURTHREAD),%r8
+	movq	TD_PCB(%r8),%r8
+	movq	$0,PCB_GSBASE(%r8)
+	jmp	doreti
+
+#ifdef HWPMC_HOOKS
+	ENTRY(end_exceptions)
+#endif
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
new file mode 100644
index 0000000..18130b5
--- /dev/null
+++ b/sys/amd64/amd64/fpu.c
@@ -0,0 +1,1012 @@
+/*-
+ * Copyright (c) 1990 William Jolitz.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/signalvar.h>
+#include <vm/uma.h>
+
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/ucontext.h>
+
+/*
+ * Floating point support.
+ */
+
+#if defined(__GNUCLIKE_ASM) && !defined(lint)
+
+#define	fldcw(cw)		__asm __volatile("fldcw %0" : : "m" (cw))
+#define	fnclex()		__asm __volatile("fnclex")
+#define	fninit()		__asm __volatile("fninit")
+#define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
+#define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=am" (*(addr)))
+#define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
+#define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
+#define	ldmxcsr(csr)		__asm __volatile("ldmxcsr %0" : : "m" (csr))
+#define	stmxcsr(addr)		__asm __volatile("stmxcsr %0" : : "m" (*(addr)))
+
+static __inline void
+xrstor(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
+}
+
+static __inline void
+xsave(char *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+	    "memory");
+}
+
+#else	/* !(__GNUCLIKE_ASM && !lint) */
+
+void	fldcw(u_short cw);
+void	fnclex(void);
+void	fninit(void);
+void	fnstcw(caddr_t addr);
+void	fnstsw(caddr_t addr);
+void	fxsave(caddr_t addr);
+void	fxrstor(caddr_t addr);
+void	ldmxcsr(u_int csr);
+void	stmxcsr(u_int *csr);
+void	xrstor(char *addr, uint64_t mask);
+void	xsave(char *addr, uint64_t mask);
+
+#endif	/* __GNUCLIKE_ASM && !lint */
+
+#define	start_emulating()	load_cr0(rcr0() | CR0_TS)
+#define	stop_emulating()	clts()
+
+CTASSERT(sizeof(struct savefpu) == 512);
+CTASSERT(sizeof(struct xstate_hdr) == 64);
+CTASSERT(sizeof(struct savefpu_ymm) == 832);
+
+/*
+ * This requirement is to make it easier for asm code to calculate
+ * offset of the fpu save area from the pcb address. FPU save area
+ * must be 64-byte aligned.
+ */
+CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
+
+static	void	fpu_clean_state(void);
+
+SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
+    NULL, 1, "Floating point instructions executed in hardware");
+
+static int use_xsaveopt;
+int use_xsave;			/* non-static for cpu_switch.S */
+uint64_t xsave_mask;		/* the same */
+static	uma_zone_t fpu_save_area_zone;
+static	struct savefpu *fpu_initialstate;
+
+struct xsave_area_elm_descr {
+	u_int	offset;
+	u_int	size;
+} *xsave_area_desc;
+
+void
+fpusave(void *addr)
+{
+
+	if (use_xsave)
+		xsave((char *)addr, xsave_mask);
+	else
+		fxsave((char *)addr);
+}
+
+void
+fpurestore(void *addr)
+{
+
+	if (use_xsave)
+		xrstor((char *)addr, xsave_mask);
+	else
+		fxrstor((char *)addr);
+}
+
+/*
+ * Enable XSAVE if supported and allowed by user.
+ * Calculate the xsave_mask.
+ */
+static void
+fpuinit_bsp1(void)
+{
+	u_int cp[4];
+	uint64_t xsave_mask_user;
+
+	if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
+		use_xsave = 1;
+		TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+	}
+	if (!use_xsave)
+		return;
+
+	cpuid_count(0xd, 0x0, cp);
+	xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	if ((cp[0] & xsave_mask) != xsave_mask)
+		panic("CPU0 does not support X87 or SSE: %x", cp[0]);
+	xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
+	xsave_mask_user = xsave_mask;
+	TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
+	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	xsave_mask &= xsave_mask_user;
+
+	cpuid_count(0xd, 0x1, cp);
+	if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+		/*
+		 * Patch the XSAVE instruction in the cpu_switch code
+		 * to XSAVEOPT.  We assume that XSAVE encoding used
+		 * REX byte, and set the bit 4 of the r/m byte.
+		 */
+		ctx_switch_xsave[3] |= 0x10;
+		use_xsaveopt = 1;
+	}
+}
+
+/*
+ * Calculate the fpu save area size.
+ */
+static void
+fpuinit_bsp2(void)
+{
+	u_int cp[4];
+
+	if (use_xsave) {
+		cpuid_count(0xd, 0x0, cp);
+		cpu_max_ext_state_size = cp[1];
+
+		/*
+		 * Reload the cpu_feature2, since we enabled OSXSAVE.
+		 */
+		do_cpuid(1, cp);
+		cpu_feature2 = cp[2];
+	} else
+		cpu_max_ext_state_size = sizeof(struct savefpu);
+}
+
+/*
+ * Initialize the floating point unit.
+ */
+void
+fpuinit(void)
+{
+	register_t saveintr;
+	u_int mxcsr;
+	u_short control;
+
+	if (IS_BSP())
+		fpuinit_bsp1();
+
+	if (use_xsave) {
+		load_cr4(rcr4() | CR4_XSAVE);
+		load_xcr(XCR0, xsave_mask);
+	}
+
+	/*
+	 * XCR0 shall be set up before CPU can report the save area size.
+	 */
+	if (IS_BSP())
+		fpuinit_bsp2();
+
+	/*
+	 * It is too early for critical_enter() to work on AP.
+	 */
+	saveintr = intr_disable();
+	stop_emulating();
+	fninit();
+	control = __INITIAL_FPUCW__;
+	fldcw(control);
+	mxcsr = __INITIAL_MXCSR__;
+	ldmxcsr(mxcsr);
+	start_emulating();
+	intr_restore(saveintr);
+}
+
+/*
+ * On the boot CPU we generate a clean state that is used to
+ * initialize the floating point unit when it is first used by a
+ * process.
+ */
+static void
+fpuinitstate(void *arg __unused)
+{
+	register_t saveintr;
+	int cp[4], i, max_ext_n;
+
+	fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+	saveintr = intr_disable();
+	stop_emulating();
+
+	fpusave(fpu_initialstate);
+	if (fpu_initialstate->sv_env.en_mxcsr_mask)
+		cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask;
+	else
+		cpu_mxcsr_mask = 0xFFBF;
+
+	/*
+	 * The fninit instruction does not modify XMM registers.  The
+	 * fpusave call dumped the garbage contained in the registers
+	 * after reset to the initial state saved.  Clear XMM
+	 * registers file image to make the startup program state and
+	 * signal handler XMM register content predictable.
+	 */
+	bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+
+	/*
+	 * Create a table describing the layout of the CPU Extended
+	 * Save Area.
+	 */
+	if (use_xsaveopt) {
+		max_ext_n = flsl(xsave_mask);
+		xsave_area_desc = malloc(max_ext_n * sizeof(struct
+		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+		/* x87 state */
+		xsave_area_desc[0].offset = 0;
+		xsave_area_desc[0].size = 160;
+		/* XMM */
+		xsave_area_desc[1].offset = 160;
+		xsave_area_desc[1].size = 288 - 160;
+
+		for (i = 2; i < max_ext_n; i++) {
+			cpuid_count(0xd, i, cp);
+			xsave_area_desc[i].offset = cp[1];
+			xsave_area_desc[i].size = cp[0];
+		}
+	}
+
+	fpu_save_area_zone = uma_zcreate("FPU_save_area",
+	    cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
+	    XSAVE_AREA_ALIGN - 1, 0);
+
+	start_emulating();
+	intr_restore(saveintr);
+}
+SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL);
+
+/*
+ * Free coprocessor (if we have it).
+ */
+void
+fpuexit(struct thread *td)
+{
+
+	critical_enter();
+	if (curthread == PCPU_GET(fpcurthread)) {
+		stop_emulating();
+		fpusave(curpcb->pcb_save);
+		start_emulating();
+		PCPU_SET(fpcurthread, 0);
+	}
+	critical_exit();
+}
+
+int
+fpuformat()
+{
+
+	return (_MC_FPFMT_XMM);
+}
+
+/* 
+ * The following mechanism is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ * 
+ * Multiple bits may be set if the user process modifies the control
+ * word while a status word bit is already set.  While this is a sign
+ * of bad coding, we have no choise than to narrow them down to one
+ * bit, since we must not send a trapcode that is not exactly one of
+ * the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries.  Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code.  The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The macro to choose one of these values does these steps: 1) Throw
+ * away status word bits that cannot be masked.  2) Throw away the bits
+ * currently masked in the control word, assuming the user isn't
+ * interested in them anymore.  3) Reinsert status word bit 7 (stack
+ * fault) if it is set, which cannot be masked but must be presered.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1  Invalid operation (FP_X_INV)
+ * 1a   Stack underflow
+ * 1b   Stack overflow
+ * 1c   Operand of unsupported format
+ * 1d   SNaN operand.
+ * 2  QNaN operand (not an exception, irrelavant here)
+ * 3  Any other invalid-operation not mentioned above or zero divide
+ *      (FP_X_INV, FP_X_DZ)
+ * 4  Denormal operand (FP_X_DNML)
+ * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6  Inexact result (FP_X_IMP) 
+ */
+static char fpetable[128] = {
+	0,
+	FPE_FLTINV,	/*  1 - INV */
+	FPE_FLTUND,	/*  2 - DNML */
+	FPE_FLTINV,	/*  3 - INV | DNML */
+	FPE_FLTDIV,	/*  4 - DZ */
+	FPE_FLTINV,	/*  5 - INV | DZ */
+	FPE_FLTDIV,	/*  6 - DNML | DZ */
+	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
+	FPE_FLTOVF,	/*  8 - OFL */
+	FPE_FLTINV,	/*  9 - INV | OFL */
+	FPE_FLTUND,	/*  A - DNML | OFL */
+	FPE_FLTINV,	/*  B - INV | DNML | OFL */
+	FPE_FLTDIV,	/*  C - DZ | OFL */
+	FPE_FLTINV,	/*  D - INV | DZ | OFL */
+	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
+	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
+	FPE_FLTUND,	/* 10 - UFL */
+	FPE_FLTINV,	/* 11 - INV | UFL */
+	FPE_FLTUND,	/* 12 - DNML | UFL */
+	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
+	FPE_FLTDIV,	/* 14 - DZ | UFL */
+	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
+	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
+	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
+	FPE_FLTOVF,	/* 18 - OFL | UFL */
+	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
+	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
+	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
+	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
+	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
+	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
+	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
+	FPE_FLTRES,	/* 20 - IMP */
+	FPE_FLTINV,	/* 21 - INV | IMP */
+	FPE_FLTUND,	/* 22 - DNML | IMP */
+	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
+	FPE_FLTDIV,	/* 24 - DZ | IMP */
+	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
+	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
+	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
+	FPE_FLTOVF,	/* 28 - OFL | IMP */
+	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
+	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
+	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
+	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
+	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
+	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
+	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
+	FPE_FLTUND,	/* 30 - UFL | IMP */
+	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
+	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
+	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
+	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
+	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
+	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
+	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
+	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
+	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
+	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
+	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
+	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
+	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
+	FPE_FLTSUB,	/* 40 - STK */
+	FPE_FLTSUB,	/* 41 - INV | STK */
+	FPE_FLTUND,	/* 42 - DNML | STK */
+	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
+	FPE_FLTDIV,	/* 44 - DZ | STK */
+	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
+	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
+	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
+	FPE_FLTOVF,	/* 48 - OFL | STK */
+	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
+	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
+	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
+	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
+	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
+	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
+	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
+	FPE_FLTUND,	/* 50 - UFL | STK */
+	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
+	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
+	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
+	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
+	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
+	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
+	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
+	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
+	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
+	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
+	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
+	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
+	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
+	FPE_FLTRES,	/* 60 - IMP | STK */
+	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
+	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
+	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
+	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
+	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
+	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
+	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
+	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
+	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
+	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
+	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
+	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
+	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
+	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
+	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
+	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
+	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
+	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
+	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
+	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
+};
+
+/*
+ * Read the FP status and control words, then generate si_code value
+ * for SIGFPE.  The error code chosen will be one of the
+ * FPE_... macros.  It will be sent as the second argument to old
+ * BSD-style signal handlers and as "siginfo_t->si_code" (second
+ * argument) to SA_SIGINFO signal handlers.
+ *
+ * Some time ago, we cleared the x87 exceptions with FNCLEX there.
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs.  The
+ * usermode code which understands the FPU hardware enough to enable
+ * the exceptions, can also handle clearing the exception state in the
+ * handler.  The only consequence of not clearing the exception is the
+ * rethrow of the SIGFPE on return from the signal handler and
+ * reexecution of the corresponding instruction.
+ *
+ * For XMM traps, the exceptions were never cleared.
+ */
+int
+fputrap_x87(void)
+{
+	struct savefpu *pcb_save;
+	u_short control, status;
+
+	critical_enter();
+
+	/*
+	 * Interrupt handling (for another interrupt) may have pushed the
+	 * state to memory.  Fetch the relevant parts of the state from
+	 * wherever they are.
+	 */
+	if (PCPU_GET(fpcurthread) != curthread) {
+		pcb_save = curpcb->pcb_save;
+		control = pcb_save->sv_env.en_cw;
+		status = pcb_save->sv_env.en_sw;
+	} else {
+		fnstcw(&control);
+		fnstsw(&status);
+	}
+
+	critical_exit();
+	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
+}
+
+int
+fputrap_sse(void)
+{
+	u_int mxcsr;
+
+	critical_enter();
+	if (PCPU_GET(fpcurthread) != curthread)
+		mxcsr = curpcb->pcb_save->sv_env.en_mxcsr;
+	else
+		stmxcsr(&mxcsr);
+	critical_exit();
+	return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+
+static int err_count = 0;
+
+void
+fpudna(void)
+{
+
+	critical_enter();
+	if (PCPU_GET(fpcurthread) == curthread) {
+		printf("fpudna: fpcurthread == curthread %d times\n",
+		    ++err_count);
+		stop_emulating();
+		critical_exit();
+		return;
+	}
+	if (PCPU_GET(fpcurthread) != NULL) {
+		printf("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+		       PCPU_GET(fpcurthread),
+		       PCPU_GET(fpcurthread)->td_proc->p_pid,
+		       curthread, curthread->td_proc->p_pid);
+		panic("fpudna");
+	}
+	stop_emulating();
+	/*
+	 * Record new context early in case frstor causes a trap.
+	 */
+	PCPU_SET(fpcurthread, curthread);
+
+	fpu_clean_state();
+
+	if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+		/*
+		 * This is the first time this thread has used the FPU or
+		 * the PCB doesn't contain a clean FPU state.  Explicitly
+		 * load an initial state.
+		 *
+		 * We prefer to restore the state from the actual save
+		 * area in PCB instead of directly loading from
+		 * fpu_initialstate, to ignite the XSAVEOPT
+		 * tracking engine.
+		 */
+		bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
+		fpurestore(curpcb->pcb_save);
+		if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+			fldcw(curpcb->pcb_initial_fpucw);
+		if (PCB_USER_FPU(curpcb))
+			set_pcb_flags(curpcb,
+			    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
+		else
+			set_pcb_flags(curpcb, PCB_FPUINITDONE);
+	} else
+		fpurestore(curpcb->pcb_save);
+	critical_exit();
+}
+
+void
+fpudrop()
+{
+	struct thread *td;
+
+	td = PCPU_GET(fpcurthread);
+	KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
+	CRITICAL_ASSERT(td);
+	PCPU_SET(fpcurthread, NULL);
+	clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE);
+	start_emulating();
+}
+
+/*
+ * Get the user state of the FPU into pcb->pcb_user_save without
+ * dropping ownership (if possible).  It returns the FPU ownership
+ * status.
+ */
+int
+fpugetregs(struct thread *td)
+{
+	struct pcb *pcb;
+	uint64_t *xstate_bv, bit;
+	char *sa;
+	int max_ext_n, i;
+
+	pcb = td->td_pcb;
+	if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
+		bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb),
+		    cpu_max_ext_state_size);
+		get_pcb_user_save_pcb(pcb)->sv_env.en_cw =
+		    pcb->pcb_initial_fpucw;
+		fpuuserinited(td);
+		return (_MC_FPOWNED_PCB);
+	}
+	critical_enter();
+	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
+		fpusave(get_pcb_user_save_pcb(pcb));
+		critical_exit();
+		return (_MC_FPOWNED_FPU);
+	} else {
+		critical_exit();
+		if (use_xsaveopt) {
+			/*
+			 * Handle partially saved state.
+			 */
+			sa = (char *)get_pcb_user_save_pcb(pcb);
+			xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+			    offsetof(struct xstate_hdr, xstate_bv));
+			max_ext_n = flsl(xsave_mask);
+			for (i = 0; i < max_ext_n; i++) {
+				bit = 1 << i;
+				if ((*xstate_bv & bit) != 0)
+					continue;
+				bcopy((char *)fpu_initialstate +
+				    xsave_area_desc[i].offset,
+				    sa + xsave_area_desc[i].offset,
+				    xsave_area_desc[i].size);
+				*xstate_bv |= bit;
+			}
+		}
+		return (_MC_FPOWNED_PCB);
+	}
+}
+
+void
+fpuuserinited(struct thread *td)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+	if (PCB_USER_FPU(pcb))
+		set_pcb_flags(pcb,
+		    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
+	else
+		set_pcb_flags(pcb, PCB_FPUINITDONE);
+}
+
+int
+fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
+{
+	struct xstate_hdr *hdr, *ehdr;
+	size_t len, max_len;
+	uint64_t bv;
+
+	/* XXXKIB should we clear all extended state in xstate_bv instead ? */
+	if (xfpustate == NULL)
+		return (0);
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	len = xfpustate_size;
+	if (len < sizeof(struct xstate_hdr))
+		return (EINVAL);
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	if (len > max_len)
+		return (EINVAL);
+
+	ehdr = (struct xstate_hdr *)xfpustate;
+	bv = ehdr->xstate_bv;
+
+	/*
+	 * Avoid #gp.
+	 */
+	if (bv & ~xsave_mask)
+		return (EINVAL);
+	if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) !=
+	    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE))
+		return (EINVAL);
+
+	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
+
+	hdr->xstate_bv = bv;
+	bcopy(xfpustate + sizeof(struct xstate_hdr),
+	    (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
+
+	return (0);
+}
+
+/*
+ * Set the state of the FPU.
+ */
+int
+fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
+    size_t xfpustate_size)
+{
+	struct pcb *pcb;
+	int error;
+
+	pcb = td->td_pcb;
+	critical_enter();
+	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0) {
+			critical_exit();
+			return (error);
+		}
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
+		fpurestore(get_pcb_user_save_td(td));
+		critical_exit();
+		set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE);
+	} else {
+		critical_exit();
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0)
+			return (error);
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
+		fpuuserinited(td);
+	}
+	return (0);
+}
+
+/*
+ * On AuthenticAMD processors, the fxrstor instruction does not restore
+ * the x87's stored last instruction pointer, last data pointer, and last
+ * opcode values, except in the rare case in which the exception summary
+ * (ES) bit in the x87 status word is set to 1.
+ *
+ * In order to avoid leaking this information across processes, we clean
+ * these values by performing a dummy load before executing fxrstor().
+ */
+static void
+fpu_clean_state(void)
+{
+	static float dummy_variable = 0.0;
+	u_short status;
+
+	/*
+	 * Clear the ES bit in the x87 status word if it is currently
+	 * set, in order to avoid causing a fault in the upcoming load.
+	 */
+	fnstsw(&status);
+	if (status & 0x80)
+		fnclex();
+
+	/*
+	 * Load the dummy variable into the x87 stack.  This mangles
+	 * the x87 stack, but we don't care since we're about to call
+	 * fxrstor() anyway.
+	 */
+	__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
+}
+
+/*
+ * This really sucks.  We want the acpi version only, but it requires
+ * the isa_if.h file in order to get the definitions.
+ */
+#include "opt_isa.h"
+#ifdef DEV_ISA
+#include <isa/isavar.h>
+/*
+ * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
+ */
+static struct isa_pnp_id fpupnp_ids[] = {
+	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
+	{ 0 }
+};
+
+static int
+fpupnp_probe(device_t dev)
+{
+	int result;
+
+	result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids);
+	if (result <= 0)
+		device_quiet(dev);
+	return (result);
+}
+
+static int
+fpupnp_attach(device_t dev)
+{
+
+	return (0);
+}
+
+static device_method_t fpupnp_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		fpupnp_probe),
+	DEVMETHOD(device_attach,	fpupnp_attach),
+	DEVMETHOD(device_detach,	bus_generic_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	
+	{ 0, 0 }
+};
+
+static driver_t fpupnp_driver = {
+	"fpupnp",
+	fpupnp_methods,
+	1,			/* no softc */
+};
+
+static devclass_t fpupnp_devclass;
+
+DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0);
+#endif	/* DEV_ISA */
+
+static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
+    "Kernel contexts for FPU state");
+
+#define	FPU_KERN_CTX_FPUINITDONE 0x01
+
+struct fpu_kern_ctx {
+	struct savefpu *prev;
+	uint32_t flags;
+	char hwstate1[];
+};
+
+struct fpu_kern_ctx *
+fpu_kern_alloc_ctx(u_int flags)
+{
+	struct fpu_kern_ctx *res;
+	size_t sz;
+
+	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
+	    cpu_max_ext_state_size;
+	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
+	    M_NOWAIT : M_WAITOK) | M_ZERO);
+	return (res);
+}
+
+void
+fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
+{
+
+	/* XXXKIB clear the memory ? */
+	free(ctx, M_FPUKERN_CTX);
+}
+
+static struct savefpu *
+fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
+{
+	vm_offset_t p;
+
+	p = (vm_offset_t)&ctx->hwstate1;
+	p = roundup2(p, XSAVE_AREA_ALIGN);
+	return ((struct savefpu *)p);
+}
+
+int
+fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
+	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
+	ctx->flags = 0;
+	if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
+		ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
+	fpuexit(td);
+	ctx->prev = pcb->pcb_save;
+	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
+	set_pcb_flags(pcb, PCB_KERNFPU);
+	clear_pcb_flags(pcb, PCB_FPUINITDONE);
+	return (0);
+}
+
+int
+fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+	critical_enter();
+	if (curthread == PCPU_GET(fpcurthread))
+		fpudrop();
+	critical_exit();
+	pcb->pcb_save = ctx->prev;
+	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
+		if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
+			set_pcb_flags(pcb, PCB_FPUINITDONE);
+			clear_pcb_flags(pcb, PCB_KERNFPU);
+		} else
+			clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU);
+	} else {
+		if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0)
+			set_pcb_flags(pcb, PCB_FPUINITDONE);
+		else
+			clear_pcb_flags(pcb, PCB_FPUINITDONE);
+		KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
+	}
+	return (0);
+}
+
+int
+fpu_kern_thread(u_int flags)
+{
+
+	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
+	    ("Only kthread may use fpu_kern_thread"));
+	KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
+	    ("mangled pcb_save"));
+	KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
+
+	set_pcb_flags(curpcb, PCB_KERNFPU);
+	return (0);
+}
+
+int
+is_fpu_kern_thread(u_int flags)
+{
+
+	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
+		return (0);
+	return ((curpcb->pcb_flags & PCB_KERNFPU) != 0);
+}
+
+/*
+ * FPU save area alloc/free/init utility routines
+ */
+struct savefpu *
+fpu_save_area_alloc(void)
+{
+
+	return (uma_zalloc(fpu_save_area_zone, 0));
+}
+
+void
+fpu_save_area_free(struct savefpu *fsa)
+{
+
+	uma_zfree(fpu_save_area_zone, fsa);
+}
+
+void
+fpu_save_area_reset(struct savefpu *fsa)
+{
+
+	bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size);
+}
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
new file mode 100644
index 0000000..5775c8f
--- /dev/null
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2004 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/signal.h>
+
+#include <machine/frame.h>
+#include <machine/gdb_machdep.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/reg.h>
+#include <machine/trap.h>
+#include <machine/frame.h>
+#include <machine/endian.h>
+
+#include <gdb/gdb.h>
+
+void *
+gdb_cpu_getreg(int regnum, size_t *regsz)
+{
+
+	*regsz = gdb_cpu_regsz(regnum);
+
+	if (kdb_thread  == curthread) {
+		switch (regnum) {
+		case 0:	return (&kdb_frame->tf_rax);
+		case 2:	return (&kdb_frame->tf_rcx);
+		case 3:	return (&kdb_frame->tf_rdx);
+		case 4:	return (&kdb_frame->tf_rsi);
+		case 5:	return (&kdb_frame->tf_rdi);
+		case 8: return (&kdb_frame->tf_r8);
+		case 9: return (&kdb_frame->tf_r9);
+		case 10: return (&kdb_frame->tf_r10);
+		case 11: return (&kdb_frame->tf_r11);
+		case 17: return (&kdb_frame->tf_rflags);
+		case 18: return (&kdb_frame->tf_cs);
+		case 19: return (&kdb_frame->tf_ss);
+		}
+	}
+	switch (regnum) {
+	case 1:  return (&kdb_thrctx->pcb_rbx);
+	case 6:  return (&kdb_thrctx->pcb_rbp);
+	case 7:  return (&kdb_thrctx->pcb_rsp);
+	case 12: return (&kdb_thrctx->pcb_r12);
+	case 13: return (&kdb_thrctx->pcb_r13);
+	case 14: return (&kdb_thrctx->pcb_r14);
+	case 15: return (&kdb_thrctx->pcb_r15);
+	case 16: return (&kdb_thrctx->pcb_rip);
+	}
+	return (NULL);
+}
+
+void
+gdb_cpu_setreg(int regnum, void *val)
+{
+
+	switch (regnum) {
+	case GDB_REG_PC:
+		kdb_thrctx->pcb_rip = *(register_t *)val;
+		if (kdb_thread  == curthread)
+			kdb_frame->tf_rip = *(register_t *)val;
+	}
+}
+
+int
+gdb_cpu_signal(int type, int code)
+{
+
+	switch (type & ~T_USER) {
+	case 0: return (SIGFPE);	/* Divide by zero. */
+	case 1: return (SIGTRAP);	/* Debug exception. */
+	case 3: return (SIGTRAP);	/* Breakpoint. */
+	case 4: return (SIGSEGV);	/* into instr. (overflow). */
+	case 5: return (SIGURG);	/* bound instruction. */
+	case 6: return (SIGILL);	/* Invalid opcode. */
+	case 7: return (SIGFPE);	/* Coprocessor not present. */
+	case 8: return (SIGEMT);	/* Double fault. */
+	case 9: return (SIGSEGV);	/* Coprocessor segment overrun. */
+	case 10: return (SIGTRAP);	/* Invalid TSS (also single-step). */
+	case 11: return (SIGSEGV);	/* Segment not present. */
+	case 12: return (SIGSEGV);	/* Stack exception. */
+	case 13: return (SIGSEGV);	/* General protection. */
+	case 14: return (SIGSEGV);	/* Page fault. */
+	case 16: return (SIGEMT);	/* Coprocessor error. */
+	}
+	return (SIGEMT);
+}
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
new file mode 100644
index 0000000..174927a
--- /dev/null
+++ b/sys/amd64/amd64/genassym.c
@@ -0,0 +1,252 @@
+/*-
+ * Copyright (c) 1982, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)genassym.c	5.11 (Berkeley) 5/10/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+#include "opt_hwpmc_hooks.h"
+#include "opt_kstack_pages.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/assym.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#ifdef	HWPMC_HOOKS
+#include <sys/pmckern.h>
+#endif
+#include <sys/errno.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/resourcevar.h>
+#include <sys/ucontext.h>
+#include <machine/tss.h>
+#include <sys/vmmeter.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <sys/proc.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <nfs/nfsproto.h>
+#include <nfsclient/nfs.h>
+#include <nfs/nfsdiskless.h>
+#include <x86/apicreg.h>
+#include <machine/cpu.h>
+#include <machine/pcb.h>
+#include <machine/sigframe.h>
+#include <machine/proc.h>
+#include <machine/segments.h>
+
+ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
+ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
+ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+
+ASSYM(P_MD, offsetof(struct proc, p_md));
+ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
+ASSYM(MD_LDT_SD, offsetof(struct mdproc, md_ldt_sd));
+
+ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
+ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
+ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
+ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_TID, offsetof(struct thread, td_tid));
+ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
+
+ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
+ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
+
+ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN);
+ASSYM(TDP_KTHREAD, TDP_KTHREAD);
+
+ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
+ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
+ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
+ASSYM(KSTACK_PAGES, KSTACK_PAGES);
+ASSYM(PAGE_SIZE, PAGE_SIZE);
+ASSYM(NPTEPG, NPTEPG);
+ASSYM(NPDEPG, NPDEPG);
+ASSYM(addr_PTmap, addr_PTmap);
+ASSYM(addr_PDmap, addr_PDmap);
+ASSYM(addr_PDPmap, addr_PDPmap);
+ASSYM(addr_PML4map, addr_PML4map);
+ASSYM(addr_PML4pml4e, addr_PML4pml4e);
+ASSYM(PDESIZE, sizeof(pd_entry_t));
+ASSYM(PTESIZE, sizeof(pt_entry_t));
+ASSYM(PTESHIFT, PTESHIFT);
+ASSYM(PAGE_SHIFT, PAGE_SHIFT);
+ASSYM(PAGE_MASK, PAGE_MASK);
+ASSYM(PDRSHIFT, PDRSHIFT);
+ASSYM(PDPSHIFT, PDPSHIFT);
+ASSYM(PML4SHIFT, PML4SHIFT);
+ASSYM(val_KPDPI, KPDPI);
+ASSYM(val_KPML4I, KPML4I);
+ASSYM(val_PML4PML4I, PML4PML4I);
+ASSYM(USRSTACK, USRSTACK);
+ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
+ASSYM(KERNBASE, KERNBASE);
+ASSYM(DMAP_MIN_ADDRESS, DMAP_MIN_ADDRESS);
+ASSYM(DMAP_MAX_ADDRESS, DMAP_MAX_ADDRESS);
+ASSYM(MCLBYTES, MCLBYTES);
+
+ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15));
+ASSYM(PCB_R14, offsetof(struct pcb, pcb_r14));
+ASSYM(PCB_R13, offsetof(struct pcb, pcb_r13));
+ASSYM(PCB_R12, offsetof(struct pcb, pcb_r12));
+ASSYM(PCB_RBP, offsetof(struct pcb, pcb_rbp));
+ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp));
+ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx));
+ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip));
+ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
+ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
+ASSYM(PCB_KGSBASE, offsetof(struct pcb, pcb_kgsbase));
+ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0));
+ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2));
+ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
+ASSYM(PCB_CR4, offsetof(struct pcb, pcb_cr4));
+ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
+ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
+ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
+ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
+ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
+ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+ASSYM(PCB_GDT, offsetof(struct pcb, pcb_gdt));
+ASSYM(PCB_IDT, offsetof(struct pcb, pcb_idt));
+ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt));
+ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr));
+ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
+ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
+ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
+ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
+ASSYM(PCB_USERFPU, sizeof(struct pcb));
+ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
+ASSYM(PCB_STAR, offsetof(struct pcb, pcb_star));
+ASSYM(PCB_LSTAR, offsetof(struct pcb, pcb_lstar));
+ASSYM(PCB_CSTAR, offsetof(struct pcb, pcb_cstar));
+ASSYM(PCB_SFMASK, offsetof(struct pcb, pcb_sfmask));
+ASSYM(PCB_XSMASK, offsetof(struct pcb, pcb_xsmask));
+ASSYM(PCB_FPUSUSPEND, offsetof(struct pcb, pcb_fpususpend));
+ASSYM(PCB_SIZE, sizeof(struct pcb));
+ASSYM(PCB_FULL_IRET, PCB_FULL_IRET);
+ASSYM(PCB_DBREGS, PCB_DBREGS);
+ASSYM(PCB_GS32BIT, PCB_GS32BIT);
+ASSYM(PCB_32BIT, PCB_32BIT);
+
+ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0));
+
+ASSYM(TF_R15, offsetof(struct trapframe, tf_r15));
+ASSYM(TF_R14, offsetof(struct trapframe, tf_r14));
+ASSYM(TF_R13, offsetof(struct trapframe, tf_r13));
+ASSYM(TF_R12, offsetof(struct trapframe, tf_r12));
+ASSYM(TF_R11, offsetof(struct trapframe, tf_r11));
+ASSYM(TF_R10, offsetof(struct trapframe, tf_r10));
+ASSYM(TF_R9, offsetof(struct trapframe, tf_r9));
+ASSYM(TF_R8, offsetof(struct trapframe, tf_r8));
+ASSYM(TF_RDI, offsetof(struct trapframe, tf_rdi));
+ASSYM(TF_RSI, offsetof(struct trapframe, tf_rsi));
+ASSYM(TF_RBP, offsetof(struct trapframe, tf_rbp));
+ASSYM(TF_RBX, offsetof(struct trapframe, tf_rbx));
+ASSYM(TF_RDX, offsetof(struct trapframe, tf_rdx));
+ASSYM(TF_RCX, offsetof(struct trapframe, tf_rcx));
+ASSYM(TF_RAX, offsetof(struct trapframe, tf_rax));
+ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
+ASSYM(TF_ADDR, offsetof(struct trapframe, tf_addr));
+ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
+ASSYM(TF_RIP, offsetof(struct trapframe, tf_rip));
+ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
+ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags));
+ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp));
+ASSYM(TF_SS, offsetof(struct trapframe, tf_ss));
+ASSYM(TF_DS, offsetof(struct trapframe, tf_ds));
+ASSYM(TF_ES, offsetof(struct trapframe, tf_es));
+ASSYM(TF_FS, offsetof(struct trapframe, tf_fs));
+ASSYM(TF_GS, offsetof(struct trapframe, tf_gs));
+ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags));
+ASSYM(TF_SIZE, sizeof(struct trapframe));
+ASSYM(TF_HASSEGS, TF_HASSEGS);
+
+ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
+ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
+ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags));
+ASSYM(ENOENT, ENOENT);
+ASSYM(EFAULT, EFAULT);
+ASSYM(ENAMETOOLONG, ENAMETOOLONG);
+ASSYM(MAXCOMLEN, MAXCOMLEN);
+ASSYM(MAXPATHLEN, MAXPATHLEN);
+ASSYM(PC_SIZEOF, sizeof(struct pcpu));
+ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
+ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
+ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
+ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
+ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
+ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
+ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
+ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
+ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
+ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p));
+ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
+ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
+ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
+ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
+ 
+ASSYM(LA_VER, offsetof(struct LAPIC, version));
+ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
+ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
+ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
+ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
+ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
+ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
+
+ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
+ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
+ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
+ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
+ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL));
+ASSYM(KUF32SEL, GSEL(GUFS32_SEL, SEL_UPL));
+ASSYM(KUG32SEL, GSEL(GUGS32_SEL, SEL_UPL));
+ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL));
+ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL));
+ASSYM(SEL_RPL_MASK, SEL_RPL_MASK);
+
+#ifdef	HWPMC_HOOKS
+ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
+#endif
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
new file mode 100644
index 0000000..2517498
--- /dev/null
+++ b/sys/amd64/amd64/identcpu.c
@@ -0,0 +1,688 @@
+/*-
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * Copyright (c) 1997 KATO Takenori.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/power.h>
+
+#include <machine/asmacros.h>
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+
+#include <x86/isa/icu.h>
+
+/* XXX - should be in header file: */
+void printcpuinfo(void);
+void identify_cpu(void);
+void earlysetcpuclass(void);
+void panicifcpuunsupported(void);
+
+static u_int find_cpu_vendor_id(void);
+static void print_AMD_info(void);
+static void print_AMD_assoc(int i);
+static void print_via_padlock_info(void);
+
+int	cpu_class;
+char machine[] = "amd64";
+
+#ifdef SCTL_MASK32
+extern int adaptive_machine_arch;
+#endif
+
+static int
+sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
+{
+#ifdef SCTL_MASK32
+	static const char machine32[] = "i386";
+#endif
+	int error;
+
+#ifdef SCTL_MASK32
+	if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
+		error = SYSCTL_OUT(req, machine32, sizeof(machine32));
+	else
+#endif
+		error = SYSCTL_OUT(req, machine, sizeof(machine));
+	return (error);
+
+}
+SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD,
+    NULL, 0, sysctl_hw_machine, "A", "Machine class");
+
+static char cpu_model[128];
+SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, 
+    cpu_model, 0, "Machine model");
+
+static int hw_clockrate;
+SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, 
+    &hw_clockrate, 0, "CPU instruction clock rate");
+
+static eventhandler_tag tsc_post_tag;
+
+static char cpu_brand[48];
+
+static struct {
+	char	*cpu_name;
+	int	cpu_class;
+} amd64_cpus[] = {
+	{ "Clawhammer",		CPUCLASS_K8 },		/* CPU_CLAWHAMMER */
+	{ "Sledgehammer",	CPUCLASS_K8 },		/* CPU_SLEDGEHAMMER */
+};
+
+static struct {
+	char	*vendor;
+	u_int	vendor_id;
+} cpu_vendors[] = {
+	{ INTEL_VENDOR_ID,	CPU_VENDOR_INTEL },	/* GenuineIntel */
+	{ AMD_VENDOR_ID,	CPU_VENDOR_AMD },	/* AuthenticAMD */
+	{ CENTAUR_VENDOR_ID,	CPU_VENDOR_CENTAUR },	/* CentaurHauls */
+};
+
+
+void
+printcpuinfo(void)
+{
+	u_int regs[4], i;
+	char *brand;
+
+	cpu_class = amd64_cpus[cpu].cpu_class;
+	printf("CPU: ");
+	strncpy(cpu_model, amd64_cpus[cpu].cpu_name, sizeof (cpu_model));
+
+	/* Check for extended CPUID information and a processor name. */
+	if (cpu_exthigh >= 0x80000004) {
+		brand = cpu_brand;
+		for (i = 0x80000002; i < 0x80000005; i++) {
+			do_cpuid(i, regs);
+			memcpy(brand, regs, sizeof(regs));
+			brand += sizeof(regs);
+		}
+	}
+
+	switch (cpu_vendor_id) {
+	case CPU_VENDOR_INTEL:
+		/* Please make up your mind folks! */
+		strcat(cpu_model, "EM64T");
+		break;
+	case CPU_VENDOR_AMD:
+		/*
+		 * Values taken from AMD Processor Recognition
+		 * http://www.amd.com/K6/k6docs/pdf/20734g.pdf
+		 * (also describes ``Features'' encodings.
+		 */
+		strcpy(cpu_model, "AMD ");
+		if ((cpu_id & 0xf00) == 0xf00)
+			strcat(cpu_model, "AMD64 Processor");
+		else
+			strcat(cpu_model, "Unknown");
+		break;
+	case CPU_VENDOR_CENTAUR:
+		strcpy(cpu_model, "VIA ");
+		if ((cpu_id & 0xff0) == 0x6f0)
+			strcat(cpu_model, "Nano Processor");
+		else
+			strcat(cpu_model, "Unknown");
+		break;
+	default:
+		strcat(cpu_model, "Unknown");
+		break;
+	}
+
+	/*
+	 * Replace cpu_model with cpu_brand minus leading spaces if
+	 * we have one.
+	 */
+	brand = cpu_brand;
+	while (*brand == ' ')
+		++brand;
+	if (*brand != '\0')
+		strcpy(cpu_model, brand);
+
+	printf("%s (", cpu_model);
+	switch(cpu_class) {
+	case CPUCLASS_K8:
+		if (tsc_freq != 0) {
+			hw_clockrate = (tsc_freq + 5000) / 1000000;
+			printf("%jd.%02d-MHz ",
+			       (intmax_t)(tsc_freq + 4999) / 1000000,
+			       (u_int)((tsc_freq + 4999) / 10000) % 100);
+		}
+		printf("K8");
+		break;
+	default:
+		printf("Unknown");	/* will panic below... */
+	}
+	printf("-class CPU)\n");
+	if (*cpu_vendor)
+		printf("  Origin = \"%s\"", cpu_vendor);
+	if (cpu_id)
+		printf("  Id = 0x%x", cpu_id);
+
+	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
+	    cpu_vendor_id == CPU_VENDOR_AMD ||
+	    cpu_vendor_id == CPU_VENDOR_CENTAUR) {
+		printf("  Family = 0x%x", CPUID_TO_FAMILY(cpu_id));
+		printf("  Model = 0x%x", CPUID_TO_MODEL(cpu_id));
+		printf("  Stepping = %u", cpu_id & CPUID_STEPPING);
+
+		/*
+		 * AMD CPUID Specification
+		 * http://support.amd.com/us/Embedded_TechDocs/25481.pdf
+		 *
+		 * Intel Processor Identification and CPUID Instruction
+		 * http://www.intel.com/assets/pdf/appnote/241618.pdf
+		 */
+		if (cpu_high > 0) {
+
+			/*
+			 * Here we should probably set up flags indicating
+			 * whether or not various features are available.
+			 * The interesting ones are probably VME, PSE, PAE,
+			 * and PGE.  The code already assumes without bothering
+			 * to check that all CPUs >= Pentium have a TSC and
+			 * MSRs.
+			 */
+			printf("\n  Features=0x%b", cpu_feature,
+			"\020"
+			"\001FPU"	/* Integral FPU */
+			"\002VME"	/* Extended VM86 mode support */
+			"\003DE"	/* Debugging Extensions (CR4.DE) */
+			"\004PSE"	/* 4MByte page tables */
+			"\005TSC"	/* Timestamp counter */
+			"\006MSR"	/* Machine specific registers */
+			"\007PAE"	/* Physical address extension */
+			"\010MCE"	/* Machine Check support */
+			"\011CX8"	/* CMPEXCH8 instruction */
+			"\012APIC"	/* SMP local APIC */
+			"\013oldMTRR"	/* Previous implementation of MTRR */
+			"\014SEP"	/* Fast System Call */
+			"\015MTRR"	/* Memory Type Range Registers */
+			"\016PGE"	/* PG_G (global bit) support */
+			"\017MCA"	/* Machine Check Architecture */
+			"\020CMOV"	/* CMOV instruction */
+			"\021PAT"	/* Page attributes table */
+			"\022PSE36"	/* 36 bit address space support */
+			"\023PN"	/* Processor Serial number */
+			"\024CLFLUSH"	/* Has the CLFLUSH instruction */
+			"\025<b20>"
+			"\026DTS"	/* Debug Trace Store */
+			"\027ACPI"	/* ACPI support */
+			"\030MMX"	/* MMX instructions */
+			"\031FXSR"	/* FXSAVE/FXRSTOR */
+			"\032SSE"	/* Streaming SIMD Extensions */
+			"\033SSE2"	/* Streaming SIMD Extensions #2 */
+			"\034SS"	/* Self snoop */
+			"\035HTT"	/* Hyperthreading (see EBX bit 16-23) */
+			"\036TM"	/* Thermal Monitor clock slowdown */
+			"\037IA64"	/* CPU can execute IA64 instructions */
+			"\040PBE"	/* Pending Break Enable */
+			);
+
+			if (cpu_feature2 != 0) {
+				printf("\n  Features2=0x%b", cpu_feature2,
+				"\020"
+				"\001SSE3"	/* SSE3 */
+				"\002PCLMULQDQ"	/* Carry-Less Mul Quadword */
+				"\003DTES64"	/* 64-bit Debug Trace */
+				"\004MON"	/* MONITOR/MWAIT Instructions */
+				"\005DS_CPL"	/* CPL Qualified Debug Store */
+				"\006VMX"	/* Virtual Machine Extensions */
+				"\007SMX"	/* Safer Mode Extensions */
+				"\010EST"	/* Enhanced SpeedStep */
+				"\011TM2"	/* Thermal Monitor 2 */
+				"\012SSSE3"	/* SSSE3 */
+				"\013CNXT-ID"	/* L1 context ID available */
+				"\014<b11>"
+				"\015FMA"	/* Fused Multiply Add */
+				"\016CX16"	/* CMPXCHG16B Instruction */
+				"\017xTPR"	/* Send Task Priority Messages*/
+				"\020PDCM"	/* Perf/Debug Capability MSR */
+				"\021<b16>"
+				"\022PCID"	/* Process-context Identifiers*/
+				"\023DCA"	/* Direct Cache Access */
+				"\024SSE4.1"	/* SSE 4.1 */
+				"\025SSE4.2"	/* SSE 4.2 */
+				"\026x2APIC"	/* xAPIC Extensions */
+				"\027MOVBE"	/* MOVBE Instruction */
+				"\030POPCNT"	/* POPCNT Instruction */
+				"\031TSCDLT"	/* TSC-Deadline Timer */
+				"\032AESNI"	/* AES Crypto */
+				"\033XSAVE"	/* XSAVE/XRSTOR States */
+				"\034OSXSAVE"	/* OS-Enabled State Management*/
+				"\035AVX"	/* Advanced Vector Extensions */
+				"\036F16C"	/* Half-precision conversions */
+				"\037RDRAND"	/* RDRAND Instruction */
+				"\040HV"	/* Hypervisor */
+				);
+			}
+
+			if (amd_feature != 0) {
+				printf("\n  AMD Features=0x%b", amd_feature,
+				"\020"		/* in hex */
+				"\001<s0>"	/* Same */
+				"\002<s1>"	/* Same */
+				"\003<s2>"	/* Same */
+				"\004<s3>"	/* Same */
+				"\005<s4>"	/* Same */
+				"\006<s5>"	/* Same */
+				"\007<s6>"	/* Same */
+				"\010<s7>"	/* Same */
+				"\011<s8>"	/* Same */
+				"\012<s9>"	/* Same */
+				"\013<b10>"	/* Undefined */
+				"\014SYSCALL"	/* Have SYSCALL/SYSRET */
+				"\015<s12>"	/* Same */
+				"\016<s13>"	/* Same */
+				"\017<s14>"	/* Same */
+				"\020<s15>"	/* Same */
+				"\021<s16>"	/* Same */
+				"\022<s17>"	/* Same */
+				"\023<b18>"	/* Reserved, unknown */
+				"\024MP"	/* Multiprocessor Capable */
+				"\025NX"	/* Has EFER.NXE, NX */
+				"\026<b21>"	/* Undefined */
+				"\027MMX+"	/* AMD MMX Extensions */
+				"\030<s23>"	/* Same */
+				"\031<s24>"	/* Same */
+				"\032FFXSR"	/* Fast FXSAVE/FXRSTOR */
+				"\033Page1GB"	/* 1-GB large page support */
+				"\034RDTSCP"	/* RDTSCP */
+				"\035<b28>"	/* Undefined */
+				"\036LM"	/* 64 bit long mode */
+				"\0373DNow!+"	/* AMD 3DNow! Extensions */
+				"\0403DNow!"	/* AMD 3DNow! */
+				);
+			}
+
+			if (amd_feature2 != 0) {
+				printf("\n  AMD Features2=0x%b", amd_feature2,
+				"\020"
+				"\001LAHF"	/* LAHF/SAHF in long mode */
+				"\002CMP"	/* CMP legacy */
+				"\003SVM"	/* Secure Virtual Mode */
+				"\004ExtAPIC"	/* Extended APIC register */
+				"\005CR8"	/* CR8 in legacy mode */
+				"\006ABM"	/* LZCNT instruction */
+				"\007SSE4A"	/* SSE4A */
+				"\010MAS"	/* Misaligned SSE mode */
+				"\011Prefetch"	/* 3DNow! Prefetch/PrefetchW */
+				"\012OSVW"	/* OS visible workaround */
+				"\013IBS"	/* Instruction based sampling */
+				"\014XOP"	/* XOP extended instructions */
+				"\015SKINIT"	/* SKINIT/STGI */
+				"\016WDT"	/* Watchdog timer */
+				"\017<b14>"
+				"\020LWP"	/* Lightweight Profiling */
+				"\021FMA4"	/* 4-operand FMA instructions */
+				"\022<b17>"
+				"\023<b18>"
+				"\024NodeId"	/* NodeId MSR support */
+				"\025<b20>"
+				"\026TBM"	/* Trailing Bit Manipulation */
+				"\027Topology"	/* Topology Extensions */
+				"\030<b23>"
+				"\031<b24>"
+				"\032<b25>"
+				"\033<b26>"
+				"\034<b27>"
+				"\035<b28>"
+				"\036<b29>"
+				"\037<b30>"
+				"\040<b31>"
+				);
+			}
+
+			if (cpu_stdext_feature != 0) {
+				printf("\n  Standard Extended Features=0x%b",
+				    cpu_stdext_feature,
+				       "\020"
+				       "\001GSFSBASE"
+				       "\002TSCADJ"
+				       "\010SMEP"
+				       "\012ENHMOVSB"
+				       "\013INVPCID"
+				       );
+			}
+
+			if (via_feature_rng != 0 || via_feature_xcrypt != 0)
+				print_via_padlock_info();
+
+			if ((cpu_feature & CPUID_HTT) &&
+			    cpu_vendor_id == CPU_VENDOR_AMD)
+				cpu_feature &= ~CPUID_HTT;
+
+			/*
+			 * If this CPU supports P-state invariant TSC then
+			 * mention the capability.
+			 */
+			if (tsc_is_invariant) {
+				printf("\n  TSC: P-state invariant");
+				if (tsc_perf_stat)
+					printf(", performance statistics");
+			}
+
+		}
+	}
+	/* Avoid ugly blank lines: only print newline when we have to. */
+	if (*cpu_vendor || cpu_id)
+		printf("\n");
+
+	if (!bootverbose)
+		return;
+
+	if (cpu_vendor_id == CPU_VENDOR_AMD)
+		print_AMD_info();
+}
+
+void
+panicifcpuunsupported(void)
+{
+
+#ifndef HAMMER
+#error "You need to specify a cpu type"
+#endif
+	/*
+	 * Now that we have told the user what they have,
+	 * let them know if that machine type isn't configured.
+	 */
+	switch (cpu_class) {
+	case CPUCLASS_X86:
+#ifndef HAMMER
+	case CPUCLASS_K8:
+#endif
+		panic("CPU class not configured");
+	default:
+		break;
+	}
+}
+
+
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status)
+{
+
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	hw_clockrate = level->total_set.freq;
+}
+
+static void
+hook_tsc_freq(void *arg __unused)
+{
+
+	if (tsc_is_invariant)
+		return;
+
+	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
+	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
+
+/*
+ * Final stage of CPU identification.
+ */
+void
+identify_cpu(void)
+{
+	u_int regs[4], cpu_stdext_disable;
+
+	do_cpuid(0, regs);
+	cpu_high = regs[0];
+	((u_int *)&cpu_vendor)[0] = regs[1];
+	((u_int *)&cpu_vendor)[1] = regs[3];
+	((u_int *)&cpu_vendor)[2] = regs[2];
+	cpu_vendor[12] = '\0';
+	cpu_vendor_id = find_cpu_vendor_id();
+
+	do_cpuid(1, regs);
+	cpu_id = regs[0];
+	cpu_procinfo = regs[1];
+	cpu_feature = regs[3];
+	cpu_feature2 = regs[2];
+
+	/*
+	 * Clear "Limit CPUID Maxval" bit and get the largest standard CPUID
+	 * function number again if it is set from BIOS.  It is necessary
+	 * for probing correct CPU topology later.
+	 * XXX This is only done on the BSP package.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high > 0 && cpu_high < 4) {
+		uint64_t msr;
+		msr = rdmsr(MSR_IA32_MISC_ENABLE);
+		if ((msr & 0x400000ULL) != 0) {
+			wrmsr(MSR_IA32_MISC_ENABLE, msr & ~0x400000ULL);
+			do_cpuid(0, regs);
+			cpu_high = regs[0];
+		}
+	}
+
+	if (cpu_high >= 7) {
+		cpuid_count(7, 0, regs);
+		cpu_stdext_feature = regs[1];
+
+		/*
+		 * Some hypervisors fail to filter out unsupported
+		 * extended features.  For now, disable the
+		 * extensions, activation of which requires setting a
+		 * bit in CR4, and which VM monitors do not support.
+		 */
+		if (cpu_feature2 & CPUID2_HV) {
+			cpu_stdext_disable = CPUID_STDEXT_FSGSBASE |
+			    CPUID_STDEXT_SMEP;
+		} else
+			cpu_stdext_disable = 0;
+		TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable);
+		cpu_stdext_feature &= ~cpu_stdext_disable;
+	}
+
+	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
+	    cpu_vendor_id == CPU_VENDOR_AMD ||
+	    cpu_vendor_id == CPU_VENDOR_CENTAUR) {
+		do_cpuid(0x80000000, regs);
+		cpu_exthigh = regs[0];
+	}
+	if (cpu_exthigh >= 0x80000001) {
+		do_cpuid(0x80000001, regs);
+		amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
+		amd_feature2 = regs[2];
+	}
+	if (cpu_exthigh >= 0x80000007) {
+		do_cpuid(0x80000007, regs);
+		amd_pminfo = regs[3];
+	}
+	if (cpu_exthigh >= 0x80000008) {
+		do_cpuid(0x80000008, regs);
+		cpu_procinfo2 = regs[2];
+	}
+
+	/* XXX */
+	cpu = CPU_CLAWHAMMER;
+}
+
+static u_int
+find_cpu_vendor_id(void)
+{
+	int	i;
+
+	for (i = 0; i < sizeof(cpu_vendors) / sizeof(cpu_vendors[0]); i++)
+		if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0)
+			return (cpu_vendors[i].vendor_id);
+	return (0);
+}
+
+static void
+print_AMD_assoc(int i)
+{
+	if (i == 255)
+		printf(", fully associative\n");
+	else
+		printf(", %d-way associative\n", i);
+}
+
+static void
+print_AMD_l2_assoc(int i)
+{
+	switch (i & 0x0f) {
+	case 0: printf(", disabled/not present\n"); break;
+	case 1: printf(", direct mapped\n"); break;
+	case 2: printf(", 2-way associative\n"); break;
+	case 4: printf(", 4-way associative\n"); break;
+	case 6: printf(", 8-way associative\n"); break;
+	case 8: printf(", 16-way associative\n"); break;
+	case 15: printf(", fully associative\n"); break;
+	default: printf(", reserved configuration\n"); break;
+	}
+}
+
+static void
+print_AMD_info(void)
+{
+	u_int regs[4];
+
+	if (cpu_exthigh < 0x80000005)
+		return;
+
+	do_cpuid(0x80000005, regs);
+	printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff);
+	print_AMD_assoc(regs[0] >> 24);
+
+	printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff);
+	print_AMD_assoc((regs[0] >> 8) & 0xff);
+
+	printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff);
+	print_AMD_assoc(regs[1] >> 24);
+
+	printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff);
+	print_AMD_assoc((regs[1] >> 8) & 0xff);
+
+	printf("L1 data cache: %d kbytes", regs[2] >> 24);
+	printf(", %d bytes/line", regs[2] & 0xff);
+	printf(", %d lines/tag", (regs[2] >> 8) & 0xff);
+	print_AMD_assoc((regs[2] >> 16) & 0xff);
+
+	printf("L1 instruction cache: %d kbytes", regs[3] >> 24);
+	printf(", %d bytes/line", regs[3] & 0xff);
+	printf(", %d lines/tag", (regs[3] >> 8) & 0xff);
+	print_AMD_assoc((regs[3] >> 16) & 0xff);
+
+	if (cpu_exthigh >= 0x80000006) {
+		do_cpuid(0x80000006, regs);
+		if ((regs[0] >> 16) != 0) {
+			printf("L2 2MB data TLB: %d entries",
+			    (regs[0] >> 16) & 0xfff);
+			print_AMD_l2_assoc(regs[0] >> 28);
+			printf("L2 2MB instruction TLB: %d entries",
+			    regs[0] & 0xfff);
+			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
+		} else {
+			printf("L2 2MB unified TLB: %d entries",
+			    regs[0] & 0xfff);
+			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
+		}
+		if ((regs[1] >> 16) != 0) {
+			printf("L2 4KB data TLB: %d entries",
+			    (regs[1] >> 16) & 0xfff);
+			print_AMD_l2_assoc(regs[1] >> 28);
+
+			printf("L2 4KB instruction TLB: %d entries",
+			    (regs[1] >> 16) & 0xfff);
+			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
+		} else {
+			printf("L2 4KB unified TLB: %d entries",
+			    (regs[1] >> 16) & 0xfff);
+			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
+		}
+		printf("L2 unified cache: %d kbytes", regs[2] >> 16);
+		printf(", %d bytes/line", regs[2] & 0xff);
+		printf(", %d lines/tag", (regs[2] >> 8) & 0x0f);
+		print_AMD_l2_assoc((regs[2] >> 12) & 0x0f);	
+	}
+
+	/*
+	 * Opteron Rev E shows a bug as in very rare occasions a read memory 
+	 * barrier is not performed as expected if it is followed by a 
+	 * non-atomic read-modify-write instruction.  
+	 * As long as that bug pops up very rarely (intensive machine usage
+	 * on other operating systems generally generates one unexplainable 
+	 * crash any 2 months) and as long as a model specific fix would be
+	 * impratical at this stage, print out a warning string if the broken
+	 * model and family are identified.
+	 */
+	if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 &&
+	    CPUID_TO_MODEL(cpu_id) <= 0x3f)
+		printf("WARNING: This architecture revision has known SMP "
+		    "hardware bugs which may cause random instability\n");
+}
+
+static void
+print_via_padlock_info(void)
+{
+	u_int regs[4];
+
+	do_cpuid(0xc0000001, regs);
+	printf("\n  VIA Padlock Features=0x%b", regs[3],
+	"\020"
+	"\003RNG"		/* RNG */
+	"\007AES"		/* ACE */
+	"\011AES-CTR"		/* ACE2 */
+	"\013SHA1,SHA256"	/* PHE */
+	"\015RSA"		/* PMM */
+	);
+}
diff --git a/sys/amd64/amd64/in_cksum.c b/sys/amd64/amd64/in_cksum.c
new file mode 100644
index 0000000..ae02e91
--- /dev/null
+++ b/sys/amd64/amd64/in_cksum.c
@@ -0,0 +1,241 @@
+/* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 1996
+ *	Matt Thomas <matt@3am-software.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <machine/in_cksum.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers
+ *    (Portable Alpha version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE32							  \
+    {									  \
+	q_util.q = sum;							  \
+	sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3];	  \
+    }
+#define REDUCE16							  \
+    {									  \
+	q_util.q = sum;							  \
+	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
+	sum = l_util.s[0] + l_util.s[1];				  \
+	ADDCARRY(sum);							  \
+    }
+
+static const u_int32_t in_masks[] = {
+	/*0 bytes*/ /*1 byte*/	/*2 bytes*/ /*3 bytes*/
+	0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF,	/* offset 0 */
+	0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00,	/* offset 1 */
+	0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000,	/* offset 2 */
+	0x00000000, 0xFF000000, 0xFF000000, 0xFF000000,	/* offset 3 */
+};
+
+union l_util {
+	u_int16_t s[2];
+	u_int32_t l;
+};
+union q_util {
+	u_int16_t s[4];
+	u_int32_t l[2];
+	u_int64_t q;
+};
+
+static u_int64_t
+in_cksumdata(const void *buf, int len)
+{
+	const u_int32_t *lw = (const u_int32_t *) buf;
+	u_int64_t sum = 0;
+	u_int64_t prefilled;
+	int offset;
+	union q_util q_util;
+
+	if ((3 & (long) lw) == 0 && len == 20) {
+	     sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
+	     REDUCE32;
+	     return sum;
+	}
+
+	if ((offset = 3 & (long) lw) != 0) {
+		const u_int32_t *masks = in_masks + (offset << 2);
+		lw = (u_int32_t *) (((long) lw) - offset);
+		sum = *lw++ & masks[len >= 3 ? 3 : len];
+		len -= 4 - offset;
+		if (len <= 0) {
+			REDUCE32;
+			return sum;
+		}
+	}
+#if 0
+	/*
+	 * Force to cache line boundary.
+	 */
+	offset = 32 - (0x1f & (long) lw);
+	if (offset < 32 && len > offset) {
+		len -= offset;
+		if (4 & offset) {
+			sum += (u_int64_t) lw[0];
+			lw += 1;
+		}
+		if (8 & offset) {
+			sum += (u_int64_t) lw[0] + lw[1];
+			lw += 2;
+		}
+		if (16 & offset) {
+			sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
+			lw += 4;
+		}
+	}
+#endif
+	/*
+	 * access prefilling to start load of next cache line.
+	 * then add current cache line
+	 * save result of prefilling for loop iteration.
+	 */
+	prefilled = lw[0];
+	while ((len -= 32) >= 4) {
+		u_int64_t prefilling = lw[8];
+		sum += prefilled + lw[1] + lw[2] + lw[3]
+			+ lw[4] + lw[5] + lw[6] + lw[7];
+		lw += 8;
+		prefilled = prefilling;
+	}
+	if (len >= 0) {
+		sum += prefilled + lw[1] + lw[2] + lw[3]
+			+ lw[4] + lw[5] + lw[6] + lw[7];
+		lw += 8;
+	} else {
+		len += 32;
+	}
+	while ((len -= 16) >= 0) {
+		sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
+		lw += 4;
+	}
+	len += 16;
+	while ((len -= 4) >= 0) {
+		sum += (u_int64_t) *lw++;
+	}
+	len += 4;
+	if (len > 0)
+		sum += (u_int64_t) (in_masks[len] & *lw);
+	REDUCE32;
+	return sum;
+}
+
+u_short
+in_addword(u_short a, u_short b)
+{
+	u_int64_t sum = a + b;
+
+	ADDCARRY(sum);
+	return (sum);
+}
+
+u_short
+in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
+{
+	u_int64_t sum;
+	union q_util q_util;
+	union l_util l_util;
+		    
+	sum = (u_int64_t) a + b + c;
+	REDUCE16;
+	return (sum);
+}
+
+u_short
+in_cksum_skip(struct mbuf *m, int len, int skip)
+{
+	u_int64_t sum = 0;
+	int mlen = 0;
+	int clen = 0;
+	caddr_t addr;
+	union q_util q_util;
+	union l_util l_util;
+
+        len -= skip;
+        for (; skip && m; m = m->m_next) {
+                if (m->m_len > skip) {
+                        mlen = m->m_len - skip;
+			addr = mtod(m, caddr_t) + skip;
+                        goto skip_start;
+                } else {
+                        skip -= m->m_len;
+                }
+        }
+
+	for (; m && len; m = m->m_next) {
+		if (m->m_len == 0)
+			continue;
+		mlen = m->m_len;
+		addr = mtod(m, caddr_t);
+skip_start:
+		if (len < mlen)
+			mlen = len;
+		if ((clen ^ (long) addr) & 1)
+		    sum += in_cksumdata(addr, mlen) << 8;
+		else
+		    sum += in_cksumdata(addr, mlen);
+
+		clen += mlen;
+		len -= mlen;
+	}
+	REDUCE16;
+	return (~sum & 0xffff);
+}
+
+u_int in_cksum_hdr(const struct ip *ip)
+{
+    u_int64_t sum = in_cksumdata(ip, sizeof(struct ip));
+    union q_util q_util;
+    union l_util l_util;
+    REDUCE16;
+    return (~sum & 0xffff);
+}
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
new file mode 100644
index 0000000..4abed4c
--- /dev/null
+++ b/sys/amd64/amd64/initcpu.c
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) KATO Takenori, 1997, 1998.
+ * 
+ * All rights reserved.  Unpublished rights reserved under the copyright
+ * laws of Japan.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer as
+ *    the first lines of this file unmodified.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+static int	hw_instruction_sse;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+    &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
+/*
+ * -1: automatic (default)
+ *  0: keep enable CLFLUSH
+ *  1: force disable CLFLUSH
+ */
+static int	hw_clflush_disable = -1;
+
+int	cpu;			/* Are we 386, 386sx, 486, etc? */
+u_int	cpu_feature;		/* Feature flags */
+u_int	cpu_feature2;		/* Feature flags */
+u_int	amd_feature;		/* AMD feature flags */
+u_int	amd_feature2;		/* AMD feature flags */
+u_int	amd_pminfo;		/* AMD advanced power management info */
+u_int	via_feature_rng;	/* VIA RNG features */
+u_int	via_feature_xcrypt;	/* VIA ACE features */
+u_int	cpu_high;		/* Highest arg to CPUID */
+u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
+u_int	cpu_id;			/* Stepping ID */
+u_int	cpu_procinfo;		/* HyperThreading Info / Brand Index / CLFUSH */
+u_int	cpu_procinfo2;		/* Multicore info */
+char	cpu_vendor[20];		/* CPU Origin code */
+u_int	cpu_vendor_id;		/* CPU vendor ID */
+u_int	cpu_fxsr;		/* SSE enabled */
+u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
+u_int	cpu_clflush_line_size = 32;
+u_int	cpu_stdext_feature;
+u_int	cpu_max_ext_state_size;
+
+SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
+	&via_feature_rng, 0, "VIA RNG feature available in CPU");
+SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
+	&via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU");
+
+static void
+init_amd(void)
+{
+
+	/*
+	 * Work around Erratum 721 for Family 10h and 12h processors.
+	 * These processors may incorrectly update the stack pointer
+	 * after a long series of push and/or near-call instructions,
+	 * or a long series of pop and/or near-return instructions.
+	 *
+	 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
+	 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
+	 *
+	 * Hypervisors do not provide access to the errata MSR,
+	 * causing #GP exception on attempt to apply the errata.  The
+	 * MSR write shall be done on host and persist globally
+	 * anyway, so do not try to do it when under virtualization.
+	 */
+	switch (CPUID_TO_FAMILY(cpu_id)) {
+	case 0x10:
+	case 0x12:
+		if ((cpu_feature2 & CPUID2_HV) == 0)
+			wrmsr(0xc0011029, rdmsr(0xc0011029) | 1);
+		break;
+	}
+}
+
+/*
+ * Initialize special VIA features
+ */
+static void
+init_via(void)
+{
+	u_int regs[4], val;
+
+	/*
+	 * Check extended CPUID for PadLock features.
+	 *
+	 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf
+	 */
+	do_cpuid(0xc0000000, regs);
+	if (regs[0] >= 0xc0000001) {
+		do_cpuid(0xc0000001, regs);
+		val = regs[3];
+	} else
+		return;
+
+	/* Enable RNG if present. */
+	if ((val & VIA_CPUID_HAS_RNG) != 0) {
+		via_feature_rng = VIA_HAS_RNG;
+		wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG);
+	}
+
+	/* Enable PadLock if present. */
+	if ((val & VIA_CPUID_HAS_ACE) != 0)
+		via_feature_xcrypt |= VIA_HAS_AES;
+	if ((val & VIA_CPUID_HAS_ACE2) != 0)
+		via_feature_xcrypt |= VIA_HAS_AESCTR;
+	if ((val & VIA_CPUID_HAS_PHE) != 0)
+		via_feature_xcrypt |= VIA_HAS_SHA;
+	if ((val & VIA_CPUID_HAS_PMM) != 0)
+		via_feature_xcrypt |= VIA_HAS_MM;
+	if (via_feature_xcrypt != 0)
+		wrmsr(0x1107, rdmsr(0x1107) | (1 << 28));
+}
+
+/*
+ * Initialize CPU control registers
+ */
+void
+initializecpu(void)
+{
+	uint64_t msr;
+	uint32_t cr4;
+
+	cr4 = rcr4();
+	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+		cr4 |= CR4_FXSR | CR4_XMM;
+		cpu_fxsr = hw_instruction_sse = 1;
+	}
+	if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
+		cr4 |= CR4_FSGSBASE;
+
+	/*
+	 * Postpone enabling the SMEP on the boot CPU until the page
+	 * tables are switched from the boot loader identity mapping
+	 * to the kernel tables.  The boot loader enables the U bit in
+	 * its tables.
+	 */
+	if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
+		cr4 |= CR4_SMEP;
+	load_cr4(cr4);
+	if ((amd_feature & AMDID_NX) != 0) {
+		msr = rdmsr(MSR_EFER) | EFER_NXE;
+		wrmsr(MSR_EFER, msr);
+		pg_nx = PG_NX;
+	}
+	switch (cpu_vendor_id) {
+	case CPU_VENDOR_AMD:
+		init_amd();
+		break;
+	case CPU_VENDOR_CENTAUR:
+		init_via();
+		break;
+	}
+}
+
+void
+initializecpucache()
+{
+
+	/*
+	 * CPUID with %eax = 1, %ebx returns
+	 * Bits 15-8: CLFLUSH line size
+	 * 	(Value * 8 = cache line size in bytes)
+	 */
+	if ((cpu_feature & CPUID_CLFSH) != 0)
+		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
+	/*
+	 * XXXKIB: (temporary) hack to work around traps generated
+	 * when CLFLUSHing APIC register window under virtualization
+	 * environments.  These environments tend to disable the
+	 * CPUID_SS feature even though the native CPU supports it.
+	 */
+	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
+	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+		cpu_feature &= ~CPUID_CLFSH;
+	/*
+	 * Allow to disable CLFLUSH feature manually by
+	 * hw.clflush_disable tunable.
+	 */
+	if (hw_clflush_disable == 1)
+		cpu_feature &= ~CPUID_CLFSH;
+}
diff --git a/sys/amd64/amd64/io.c b/sys/amd64/amd64/io.c
new file mode 100644
index 0000000..c2d0d51
--- /dev/null
+++ b/sys/amd64/amd64/io.c
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2004 Mark R V Murray
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/proc.h>
+
+#include <machine/frame.h>
+#include <machine/iodev.h>
+#include <machine/psl.h>
+
+int
+iodev_open(struct thread *td)
+{
+
+	td->td_frame->tf_rflags |= PSL_IOPL;
+	return (0);
+}
+
+int
+iodev_close(struct thread *td)
+{
+
+	td->td_frame->tf_rflags &= ~PSL_IOPL;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+iodev_ioctl(u_long cmd __unused, caddr_t data __unused)
+{
+
+	return (ENOIOCTL);
+}
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
new file mode 100644
index 0000000..55cda3a
--- /dev/null
+++ b/sys/amd64/amd64/locore.S
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm <peter@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * Compiled KERNBASE location
+ */
+	.globl	kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
+	.set	kernbase,KERNBASE
+	.set	loc_PTmap,addr_PTmap
+	.set	loc_PDmap,addr_PDmap
+	.set	loc_PDPmap,addr_PDPmap
+	.set	loc_PML4map,addr_PML4map
+	.set	loc_PML4pml4e,addr_PML4pml4e
+	.set	dmapbase,DMAP_MIN_ADDRESS
+	.set	dmapend,DMAP_MAX_ADDRESS
+
+	.text
+/**********************************************************************
+ *
+ * This is where the loader trampoline start us, set the ball rolling...
+ *
+ * We are called with the stack looking like this:
+ * 0(%rsp) = 32 bit return address (cannot be used)
+ * 4(%rsp) = 32 bit modulep
+ * 8(%rsp) = 32 bit kernend
+ *
+ * We are already in long mode, on a 64 bit %cs and running at KERNBASE.
+ */
+NON_GPROF_ENTRY(btext)
+
+	/* Tell the bios to warmboot next time */
+	movw	$0x1234,0x472
+
+	/* Don't trust what the loader gives for rflags. */
+	pushq	$PSL_KERNEL
+	popfq
+
+	/* Find the metadata pointers before we lose them */
+	movq	%rsp, %rbp
+	movl	4(%rbp),%edi		/* modulep (arg 1) */
+	movl	8(%rbp),%esi		/* kernend (arg 2) */
+
+	/* Get onto a stack that we can trust - there is no going back now. */
+	movq	$bootstack,%rsp
+	xorl	%ebp, %ebp
+
+	call	hammer_time		/* set up cpu for unix operation */
+	movq	%rax,%rsp		/* set up kstack for mi_startup() */
+	call	mi_startup		/* autoconfiguration, mountroot etc */
+0:	hlt
+	jmp	0b
+
+	.bss
+	ALIGN_DATA			/* just to be sure */
+	.space	0x1000			/* space for bootstack - temporary stack */
+bootstack:
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
new file mode 100644
index 0000000..f5e1437
--- /dev/null
+++ b/sys/amd64/amd64/machdep.c
@@ -0,0 +1,2556 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_atalk.h"
+#include "opt_atpic.h"
+#include "opt_compat.h"
+#include "opt_cpu.h"
+#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_ipx.h"
+#include "opt_isa.h"
+#include "opt_kstack_pages.h"
+#include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
+#include "opt_perfmon.h"
+#include "opt_sched.h"
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/cons.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/ptrace.h>
+#include <sys/reboot.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/signalvar.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/ucontext.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_param.h>
+
+#ifdef DDB
+#ifndef KDB
+#error KDB must be enabled in order for DDB to work!
+#endif
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+#endif
+
+#include <net/netisr.h>
+
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/intr_machdep.h>
+#include <x86/mca.h>
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/mp_watchdog.h>
+#include <machine/pc/bios.h>
+#include <machine/pcb.h>
+#include <machine/proc.h>
+#include <machine/reg.h>
+#include <machine/sigframe.h>
+#include <machine/specialreg.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#include <machine/tss.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#ifdef DEV_ATPIC
+#include <x86/isa/icu.h>
+#else
+#include <machine/apicvar.h>
+#endif
+
+#include <isa/isareg.h>
+#include <isa/rtc.h>
+
+/* Sanity check for __curthread() */
+CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
+
+extern u_int64_t hammer_time(u_int64_t, u_int64_t);
+
+extern void printcpuinfo(void);	/* XXX header file */
+extern void identify_cpu(void);
+extern void panicifcpuunsupported(void);
+
+#define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
+#define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+
+static void cpu_startup(void *);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpusave, size_t xfpusave_len);
+static int  set_fpcontext(struct thread *td, const mcontext_t *mcp,
+    char *xfpustate, size_t xfpustate_len);
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
+
+/*
+ * The file "conf/ldscript.amd64" defines the symbol "kernphys".  Its value is
+ * the physical address at which the kernel is loaded.
+ */
+extern char kernphys[];
+#ifdef DDB
+extern vm_offset_t ksym_start, ksym_end;
+#endif
+
+struct msgbuf *msgbufp;
+
+/* Intel ICH registers */
+#define ICH_PMBASE	0x400
+#define ICH_SMI_EN	ICH_PMBASE + 0x30
+
+int	_udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
+
+int cold = 1;
+
+long Maxmem = 0;
+long realmem = 0;
+
+/*
+ * The number of PHYSMAP entries must be one less than the number of
+ * PHYSSEG entries because the PHYSMAP entry that spans the largest
+ * physical address that is accessible by ISA DMA is split into two
+ * PHYSSEG entries.
+ */
+#define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
+
+vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
+vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
+
+/* must be 2 less so 0 0 can signal end of chunks */
+#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
+#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
+
+struct kva_md_info kmi;
+
+static struct trapframe proc0_tf;
+struct region_descriptor r_gdt, r_idt;
+
+struct pcpu __pcpu[MAXCPU];
+
+struct mtx icu_lock;
+
+struct mem_range_softc mem_range_softc;
+
+struct mtx dt_lock;	/* lock for GDT and LDT */
+
+static void
+cpu_startup(dummy)
+	void *dummy;
+{
+	uintmax_t memsize;
+	char *sysenv;
+
+	/*
+	 * On MacBooks, we need to disallow the legacy USB circuit to
+	 * generate an SMI# because this can cause several problems,
+	 * namely: incorrect CPU frequency detection and failure to
+	 * start the APs.
+	 * We do this by disabling a bit in the SMI_EN (SMI Control and
+	 * Enable register) of the Intel ICH LPC Interface Bridge. 
+	 */
+	sysenv = getenv("smbios.system.product");
+	if (sysenv != NULL) {
+		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
+		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
+		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
+		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
+		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
+		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
+			if (bootverbose)
+				printf("Disabling LEGACY_USB_EN bit on "
+				    "Intel ICH.\n");
+			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
+		}
+		freeenv(sysenv);
+	}
+
+	/*
+	 * Good {morning,afternoon,evening,night}.
+	 */
+	startrtclock();
+	printcpuinfo();
+	panicifcpuunsupported();
+#ifdef PERFMON
+	perfmon_init();
+#endif
+	realmem = Maxmem;
+
+	/*
+	 * Display physical memory if SMBIOS reports reasonable amount.
+	 */
+	memsize = 0;
+	sysenv = getenv("smbios.memory.enabled");
+	if (sysenv != NULL) {
+		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
+		freeenv(sysenv);
+	}
+	if (memsize < ptoa((uintmax_t)cnt.v_free_count))
+		memsize = ptoa((uintmax_t)Maxmem);
+	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
+
+	/*
+	 * Display any holes after the first chunk of extended memory.
+	 */
+	if (bootverbose) {
+		int indx;
+
+		printf("Physical memory chunk(s):\n");
+		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
+			vm_paddr_t size;
+
+			size = phys_avail[indx + 1] - phys_avail[indx];
+			printf(
+			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
+			    (uintmax_t)phys_avail[indx],
+			    (uintmax_t)phys_avail[indx + 1] - 1,
+			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
+		}
+	}
+
+	vm_ksubmap_init(&kmi);
+
+	printf("avail memory = %ju (%ju MB)\n",
+	    ptoa((uintmax_t)cnt.v_free_count),
+	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
+
+	/*
+	 * Set up buffers, so they can be used to read disk labels.
+	 */
+	bufinit();
+	vm_pager_bufferinit();
+
+	cpu_setregs();
+}
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by call
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+void
+sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct sigframe sf, *sfp;
+	struct pcb *pcb;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	char *sp;
+	struct trapframe *regs;
+	char *xfpusave;
+	size_t xfpusave_len;
+	int sig;
+	int oonstack;
+
+	td = curthread;
+	pcb = td->td_pcb;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	sig = ksi->ksi_signo;
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
+		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+		xfpusave = __builtin_alloca(xfpusave_len);
+	} else {
+		xfpusave_len = 0;
+		xfpusave = NULL;
+	}
+
+	/* Save user context. */
+	bzero(&sf, sizeof(sf));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack = td->td_sigstk;
+	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
+	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
+	fpstate_drop(td);
+	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
+	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
+	bzero(sf.sf_uc.uc_mcontext.mc_spare,
+	    sizeof(sf.sf_uc.uc_mcontext.mc_spare));
+	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
+#if defined(COMPAT_43)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+#endif
+	} else
+		sp = (char *)regs->tf_rsp - 128;
+	if (xfpusave != NULL) {
+		sp -= xfpusave_len;
+		sp = (char *)((unsigned long)sp & ~0x3Ful);
+		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+	}
+	sp -= sizeof(struct sigframe);
+	/* Align to 16 bytes. */
+	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	regs->tf_rdi = sig;			/* arg 1 in %rdi */
+	regs->tf_rdx = (register_t)&sfp->sf_uc;	/* arg 3 in %rdx */
+	bzero(&sf.sf_si, sizeof(sf.sf_si));
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
+		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+		/* Fill in POSIX parts */
+		sf.sf_si = ksi->ksi_info;
+		sf.sf_si.si_signo = sig; /* maybe a translated signal */
+		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
+	} else {
+		/* Old FreeBSD-style arguments. */
+		regs->tf_rsi = ksi->ksi_code;	/* arg 2 in %rsi */
+		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
+		sf.sf_ahu.sf_handler = catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+	    (xfpusave != NULL && copyout(xfpusave,
+	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+	    != 0)) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_rsp = (long)sfp;
+	regs->tf_rip = p->p_sysent->sv_sigcode_base;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucodesel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _ufssel;
+	regs->tf_gs = _ugssel;
+	regs->tf_flags = TF_HASSEGS;
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ *
+ * MPSAFE
+ */
+int
+sys_sigreturn(td, uap)
+	struct thread *td;
+	struct sigreturn_args /* {
+		const struct __ucontext *sigcntxp;
+	} */ *uap;
+{
+	ucontext_t uc;
+	struct pcb *pcb;
+	struct proc *p;
+	struct trapframe *regs;
+	ucontext_t *ucp;
+	char *xfpustate;
+	size_t xfpustate_len;
+	long rflags;
+	int cs, error, ret;
+	ksiginfo_t ksi;
+
+	pcb = td->td_pcb;
+	p = td->td_proc;
+
+	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+	if (error != 0) {
+		uprintf("pid %d (%s): sigreturn copyin failed\n",
+		    p->p_pid, td->td_name);
+		return (error);
+	}
+	ucp = &uc;
+	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
+		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
+		    td->td_name, ucp->uc_mcontext.mc_flags);
+		return (EINVAL);
+	}
+	regs = td->td_frame;
+	rflags = ucp->uc_mcontext.mc_rflags;
+	/*
+	 * Don't allow users to change privileged or reserved flags.
+	 */
+	/*
+	 * XXX do allow users to change the privileged flag PSL_RF.
+	 * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
+	 * should sometimes set it there too.  tf_rflags is kept in
+	 * the signal context during signal handling and there is no
+	 * other place to remember it, so the PSL_RF bit may be
+	 * corrupted by the signal handler without us knowing.
+	 * Corruption of the PSL_RF bit at worst causes one more or
+	 * one less debugger trap, so allowing it is fairly harmless.
+	 */
+	if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
+		uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
+		    td->td_name, rflags);
+		return (EINVAL);
+	}
+
+	/*
+	 * Don't allow users to load a valid privileged %cs.  Let the
+	 * hardware check for invalid selectors, excess privilege in
+	 * other selectors, invalid %eip's and invalid %esp's.
+	 */
+	cs = ucp->uc_mcontext.mc_cs;
+	if (!CS_SECURE(cs)) {
+		uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
+		    td->td_name, cs);
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return (EINVAL);
+	}
+
+	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+		if (xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu)) {
+			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+			    p->p_pid, td->td_name, xfpustate_len);
+			return (EINVAL);
+		}
+		xfpustate = __builtin_alloca(xfpustate_len);
+		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+		    xfpustate, xfpustate_len);
+		if (error != 0) {
+			uprintf(
+	"pid %d (%s): sigreturn copying xfpustate failed\n",
+			    p->p_pid, td->td_name);
+			return (error);
+		}
+	} else {
+		xfpustate = NULL;
+		xfpustate_len = 0;
+	}
+	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
+	if (ret != 0) {
+		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
+		    p->p_pid, td->td_name, ret);
+		return (ret);
+	}
+	bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
+	pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
+	pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
+
+#if defined(COMPAT_43)
+	if (ucp->uc_mcontext.mc_onstack & 1)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+#endif
+
+	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+	return (EJUSTRETURN);
+}
+
+#ifdef COMPAT_FREEBSD4
+int
+freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
+{
+ 
+	return sys_sigreturn(td, (struct sigreturn_args *)uap);
+}
+#endif
+
+
+/*
+ * Machine dependent boot() routine
+ *
+ * I haven't seen anything to put here yet
+ * Possibly some stuff might be grafted back here from boot()
+ */
+void
+cpu_boot(int howto)
+{
+}
+
+/*
+ * Flush the D-cache for non-DMA I/O so that the I-cache can
+ * be made coherent later.
+ */
+void
+cpu_flush_dcache(void *ptr, size_t len)
+{
+	/* Not applicable */
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+	uint64_t tsc1, tsc2;
+	uint64_t acnt, mcnt, perf;
+	register_t reg;
+
+	if (pcpu_find(cpu_id) == NULL || rate == NULL)
+		return (EINVAL);
+
+	/*
+	 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
+	 * DELAY(9) based logic fails.
+	 */
+	if (tsc_is_invariant && !tsc_perf_stat)
+		return (EOPNOTSUPP);
+
+#ifdef SMP
+	if (smp_cpus > 1) {
+		/* Schedule ourselves on the indicated cpu. */
+		thread_lock(curthread);
+		sched_bind(curthread, cpu_id);
+		thread_unlock(curthread);
+	}
+#endif
+
+	/* Calibrate by measuring a short delay. */
+	reg = intr_disable();
+	if (tsc_is_invariant) {
+		wrmsr(MSR_MPERF, 0);
+		wrmsr(MSR_APERF, 0);
+		tsc1 = rdtsc();
+		DELAY(1000);
+		mcnt = rdmsr(MSR_MPERF);
+		acnt = rdmsr(MSR_APERF);
+		tsc2 = rdtsc();
+		intr_restore(reg);
+		perf = 1000 * acnt / mcnt;
+		*rate = (tsc2 - tsc1) * perf;
+	} else {
+		tsc1 = rdtsc();
+		DELAY(1000);
+		tsc2 = rdtsc();
+		intr_restore(reg);
+		*rate = (tsc2 - tsc1) * 1000;
+	}
+
+#ifdef SMP
+	if (smp_cpus > 1) {
+		thread_lock(curthread);
+		sched_unbind(curthread);
+		thread_unlock(curthread);
+	}
+#endif
+
+	return (0);
+}
+
+/*
+ * Shutdown the CPU as much as possible
+ */
+void
+cpu_halt(void)
+{
+	for (;;)
+		halt();
+}
+
+void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
+static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
+static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+    0, "Use MONITOR/MWAIT for short idle");
+
+#define	STATE_RUNNING	0x0
+#define	STATE_MWAIT	0x1
+#define	STATE_SLEEPING	0x2
+
+static void
+cpu_idle_acpi(sbintime_t sbt)
+{
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
+
+	/* See comments in cpu_idle_hlt(). */
+	disable_intr();
+	if (sched_runnable())
+		enable_intr();
+	else if (cpu_idle_hook)
+		cpu_idle_hook(sbt);
+	else
+		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
+}
+
+static void
+cpu_idle_hlt(sbintime_t sbt)
+{
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
+
+	/*
+	 * Since we may be in a critical section from cpu_idle(), if
+	 * an interrupt fires during that critical section we may have
+	 * a pending preemption.  If the CPU halts, then that thread
+	 * may not execute until a later interrupt awakens the CPU.
+	 * To handle this race, check for a runnable thread after
+	 * disabling interrupts and immediately return if one is
+	 * found.  Also, we must absolutely guarentee that hlt is
+	 * the next instruction after sti.  This ensures that any
+	 * interrupt that fires after the call to disable_intr() will
+	 * immediately awaken the CPU from hlt.  Finally, please note
+	 * that on x86 this works fine because of interrupts enabled only
+	 * after the instruction following sti takes place, while IF is set
+	 * to 1 immediately, allowing hlt instruction to acknowledge the
+	 * interrupt.
+	 */
+	disable_intr();
+	if (sched_runnable())
+		enable_intr();
+	else
+		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
+}
+
+/*
+ * MWAIT cpu power states.  Lower 4 bits are sub-states.
+ */
+#define	MWAIT_C0	0xf0
+#define	MWAIT_C1	0x00
+#define	MWAIT_C2	0x10
+#define	MWAIT_C3	0x20
+#define	MWAIT_C4	0x30
+
+static void
+cpu_idle_mwait(sbintime_t sbt)
+{
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_MWAIT;
+
+	/* See comments in cpu_idle_hlt(). */
+	disable_intr();
+	if (sched_runnable()) {
+		enable_intr();
+		*state = STATE_RUNNING;
+		return;
+	}
+	cpu_monitor(state, 0, 0);
+	if (*state == STATE_MWAIT)
+		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
+	else
+		enable_intr();
+	*state = STATE_RUNNING;
+}
+
+static void
+cpu_idle_spin(sbintime_t sbt)
+{
+	int *state;
+	int i;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_RUNNING;
+
+	/*
+	 * The sched_runnable() call is racy but as long as there is
+	 * a loop missing it one time will have just a little impact if any
+	 * (and it is much better than missing the check at all).
+	 */
+	for (i = 0; i < 1000; i++) {
+		if (sched_runnable())
+			return;
+		cpu_spinwait();
+	}
+}
+
+/*
+ * C1E renders the local APIC timer dead, so we disable it by
+ * reading the Interrupt Pending Message register and clearing
+ * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
+ * 
+ * Reference:
+ *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
+ *   #32559 revision 3.00+
+ */
+#define	MSR_AMDK8_IPM		0xc0010055
+#define	AMDK8_SMIONCMPHALT	(1ULL << 27)
+#define	AMDK8_C1EONCMPHALT	(1ULL << 28)
+#define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
+
+static void
+cpu_probe_amdc1e(void)
+{
+
+	/*
+	 * Detect the presence of C1E capability mostly on latest
+	 * dual-cores (or future) k8 family.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_AMD &&
+	    (cpu_id & 0x00000f00) == 0x00000f00 &&
+	    (cpu_id & 0x0fff0000) >=  0x00040000) {
+		cpu_ident_amdc1e = 1;
+	}
+}
+
+void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
+
+void
+cpu_idle(int busy)
+{
+	uint64_t msr;
+	sbintime_t sbt = -1;
+
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
+#ifdef MP_WATCHDOG
+	ap_watchdog(PCPU_GET(cpuid));
+#endif
+	/* If we are busy - try to use fast methods. */
+	if (busy) {
+		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+			cpu_idle_mwait(busy);
+			goto out;
+		}
+	}
+
+	/* If we have time - switch timers into idle mode. */
+	if (!busy) {
+		critical_enter();
+		sbt = cpu_idleclock();
+	}
+
+	/* Apply AMD APIC timer C1E workaround. */
+	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+		msr = rdmsr(MSR_AMDK8_IPM);
+		if (msr & AMDK8_CMPHALT)
+			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+	}
+
+	/* Call main idle method. */
+	cpu_idle_fn(sbt);
+
+	/* Switch timers mack into active mode. */
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+out:
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
+}
+
+int
+cpu_idle_wakeup(int cpu)
+{
+	struct pcpu *pcpu;
+	int *state;
+
+	pcpu = pcpu_find(cpu);
+	state = (int *)pcpu->pc_monitorbuf;
+	/*
+	 * This doesn't need to be atomic since missing the race will
+	 * simply result in unnecessary IPIs.
+	 */
+	if (*state == STATE_SLEEPING)
+		return (0);
+	if (*state == STATE_MWAIT)
+		*state = STATE_RUNNING;
+	return (1);
+}
+
+/*
+ * Ordered by speed/power consumption.
+ */
+struct {
+	void	*id_fn;
+	char	*id_name;
+} idle_tbl[] = {
+	{ cpu_idle_spin, "spin" },
+	{ cpu_idle_mwait, "mwait" },
+	{ cpu_idle_hlt, "hlt" },
+	{ cpu_idle_acpi, "acpi" },
+	{ NULL, NULL }
+};
+
+static int
+idle_sysctl_available(SYSCTL_HANDLER_ARGS)
+{
+	char *avail, *p;
+	int error;
+	int i;
+
+	avail = malloc(256, M_TEMP, M_WAITOK);
+	p = avail;
+	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+		if (strstr(idle_tbl[i].id_name, "mwait") &&
+		    (cpu_feature2 & CPUID2_MON) == 0)
+			continue;
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
+			continue;
+		p += sprintf(p, "%s%s", p != avail ? ", " : "",
+		    idle_tbl[i].id_name);
+	}
+	error = sysctl_handle_string(oidp, avail, 0, req);
+	free(avail, M_TEMP);
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+    0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
+static int
+idle_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	char buf[16];
+	int error;
+	char *p;
+	int i;
+
+	p = "unknown";
+	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+		if (idle_tbl[i].id_fn == cpu_idle_fn) {
+			p = idle_tbl[i].id_name;
+			break;
+		}
+	}
+	strncpy(buf, p, sizeof(buf));
+	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
+		if (strstr(idle_tbl[i].id_name, "mwait") &&
+		    (cpu_feature2 & CPUID2_MON) == 0)
+			continue;
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
+			continue;
+		if (strcmp(idle_tbl[i].id_name, buf))
+			continue;
+		cpu_idle_fn = idle_tbl[i].id_fn;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
+    idle_sysctl, "A", "currently selected idle function");
+
+/*
+ * Reset registers to default values on exec.
+ */
+void
+exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
+{
+	struct trapframe *regs = td->td_frame;
+	struct pcb *pcb = td->td_pcb;
+
+	mtx_lock(&dt_lock);
+	if (td->td_proc->p_md.md_ldt != NULL)
+		user_ldt_free(td);
+	else
+		mtx_unlock(&dt_lock);
+	
+	pcb->pcb_fsbase = 0;
+	pcb->pcb_gsbase = 0;
+	clear_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT);
+	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+
+	bzero((char *)regs, sizeof(struct trapframe));
+	regs->tf_rip = imgp->entry_addr;
+	regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
+	regs->tf_rdi = stack;		/* argv */
+	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
+	regs->tf_ss = _udatasel;
+	regs->tf_cs = _ucodesel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _ufssel;
+	regs->tf_gs = _ugssel;
+	regs->tf_flags = TF_HASSEGS;
+	td->td_retval[1] = 0;
+
+	/*
+	 * Reset the hardware debug registers if they were in use.
+	 * They won't have any meaning for the newly exec'd process.
+	 */
+	if (pcb->pcb_flags & PCB_DBREGS) {
+		pcb->pcb_dr0 = 0;
+		pcb->pcb_dr1 = 0;
+		pcb->pcb_dr2 = 0;
+		pcb->pcb_dr3 = 0;
+		pcb->pcb_dr6 = 0;
+		pcb->pcb_dr7 = 0;
+		if (pcb == curpcb) {
+			/*
+			 * Clear the debug registers on the running
+			 * CPU, otherwise they will end up affecting
+			 * the next process we switch to.
+			 */
+			reset_dbregs();
+		}
+		clear_pcb_flags(pcb, PCB_DBREGS);
+	}
+
+	/*
+	 * Drop the FP state if we hold it, so that the process gets a
+	 * clean FP state if it uses the FPU again.
+	 */
+	fpstate_drop(td);
+}
+
+void
+cpu_setregs(void)
+{
+	register_t cr0;
+
+	cr0 = rcr0();
+	/*
+	 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
+	 * BSP.  See the comments there about why we set them.
+	 */
+	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
+	load_cr0(cr0);
+}
+
+/*
+ * Initialize amd64 and configure to run kernel
+ */
+
+/*
+ * Initialize segments & interrupt table
+ */
+
+struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
+static struct gate_descriptor idt0[NIDT];
+struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
+
+static char dblfault_stack[PAGE_SIZE] __aligned(16);
+
+static char nmi0_stack[PAGE_SIZE] __aligned(16);
+CTASSERT(sizeof(struct nmi_pcpu) == 16);
+
+struct amd64tss common_tss[MAXCPU];
+
+/*
+ * Software prototypes -- in more palatable form.
+ *
+ * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
+ * slots as corresponding segments for i386 kernel.
+ */
+struct soft_segment_descriptor gdt_segs[] = {
+/* GNULL_SEL	0 Null Descriptor */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0x0,
+	.ssd_type = 0,
+	.ssd_dpl = 0,
+	.ssd_p = 0,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+/* GNULL2_SEL	1 Null Descriptor */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0x0,
+	.ssd_type = 0,
+	.ssd_dpl = 0,
+	.ssd_p = 0,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+/* GUFS32_SEL	2 32 bit %gs Descriptor for user */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMRWA,
+	.ssd_dpl = SEL_UPL,
+	.ssd_p = 1,
+	.ssd_long = 0,
+	.ssd_def32 = 1,
+	.ssd_gran = 1		},
+/* GUGS32_SEL	3 32 bit %fs Descriptor for user */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMRWA,
+	.ssd_dpl = SEL_UPL,
+	.ssd_p = 1,
+	.ssd_long = 0,
+	.ssd_def32 = 1,
+	.ssd_gran = 1		},
+/* GCODE_SEL	4 Code Descriptor for kernel */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMERA,
+	.ssd_dpl = SEL_KPL,
+	.ssd_p = 1,
+	.ssd_long = 1,
+	.ssd_def32 = 0,
+	.ssd_gran = 1		},
+/* GDATA_SEL	5 Data Descriptor for kernel */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMRWA,
+	.ssd_dpl = SEL_KPL,
+	.ssd_p = 1,
+	.ssd_long = 1,
+	.ssd_def32 = 0,
+	.ssd_gran = 1		},
+/* GUCODE32_SEL	6 32 bit Code Descriptor for user */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMERA,
+	.ssd_dpl = SEL_UPL,
+	.ssd_p = 1,
+	.ssd_long = 0,
+	.ssd_def32 = 1,
+	.ssd_gran = 1		},
+/* GUDATA_SEL	7 32/64 bit Data Descriptor for user */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMRWA,
+	.ssd_dpl = SEL_UPL,
+	.ssd_p = 1,
+	.ssd_long = 0,
+	.ssd_def32 = 1,
+	.ssd_gran = 1		},
+/* GUCODE_SEL	8 64 bit Code Descriptor for user */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0xfffff,
+	.ssd_type = SDT_MEMERA,
+	.ssd_dpl = SEL_UPL,
+	.ssd_p = 1,
+	.ssd_long = 1,
+	.ssd_def32 = 0,
+	.ssd_gran = 1		},
+/* GPROC0_SEL	9 Proc 0 Tss Descriptor */
+{	.ssd_base = 0x0,
+	.ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1,
+	.ssd_type = SDT_SYSTSS,
+	.ssd_dpl = SEL_KPL,
+	.ssd_p = 1,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+/* Actually, the TSS is a system descriptor which is double size */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0x0,
+	.ssd_type = 0,
+	.ssd_dpl = 0,
+	.ssd_p = 0,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+/* GUSERLDT_SEL	11 LDT Descriptor */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0x0,
+	.ssd_type = 0,
+	.ssd_dpl = 0,
+	.ssd_p = 0,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+/* GUSERLDT_SEL	12 LDT Descriptor, double size */
+{	.ssd_base = 0x0,
+	.ssd_limit = 0x0,
+	.ssd_type = 0,
+	.ssd_dpl = 0,
+	.ssd_p = 0,
+	.ssd_long = 0,
+	.ssd_def32 = 0,
+	.ssd_gran = 0		},
+};
+
+void
+setidt(idx, func, typ, dpl, ist)
+	int idx;
+	inthand_t *func;
+	int typ;
+	int dpl;
+	int ist;
+{
+	struct gate_descriptor *ip;
+
+	ip = idt + idx;
+	ip->gd_looffset = (uintptr_t)func;
+	ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
+	ip->gd_ist = ist;
+	ip->gd_xx = 0;
+	ip->gd_type = typ;
+	ip->gd_dpl = dpl;
+	ip->gd_p = 1;
+	ip->gd_hioffset = ((uintptr_t)func)>>16 ;
+}
+
+extern inthand_t
+	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
+	IDTVEC(xmm), IDTVEC(dblfault),
+#ifdef KDTRACE_HOOKS
+	IDTVEC(dtrace_ret),
+#endif
+	IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
+
+#ifdef DDB
+/*
+ * Display the index and function name of any IDT entries that don't use
+ * the default 'rsvd' entry point.
+ */
+DB_SHOW_COMMAND(idt, db_show_idt)
+{
+	struct gate_descriptor *ip;
+	int idx;
+	uintptr_t func;
+
+	ip = idt;
+	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
+		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func != (uintptr_t)&IDTVEC(rsvd)) {
+			db_printf("%3d\t", idx);
+			db_printsym(func, DB_STGY_PROC);
+			db_printf("\n");
+		}
+		ip++;
+	}
+}
+#endif
+
+void
+sdtossd(sd, ssd)
+	struct user_segment_descriptor *sd;
+	struct soft_segment_descriptor *ssd;
+{
+
+	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
+	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
+	ssd->ssd_type  = sd->sd_type;
+	ssd->ssd_dpl   = sd->sd_dpl;
+	ssd->ssd_p     = sd->sd_p;
+	ssd->ssd_long  = sd->sd_long;
+	ssd->ssd_def32 = sd->sd_def32;
+	ssd->ssd_gran  = sd->sd_gran;
+}
+
+void
+ssdtosd(ssd, sd)
+	struct soft_segment_descriptor *ssd;
+	struct user_segment_descriptor *sd;
+{
+
+	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
+	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
+	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
+	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
+	sd->sd_type  = ssd->ssd_type;
+	sd->sd_dpl   = ssd->ssd_dpl;
+	sd->sd_p     = ssd->ssd_p;
+	sd->sd_long  = ssd->ssd_long;
+	sd->sd_def32 = ssd->ssd_def32;
+	sd->sd_gran  = ssd->ssd_gran;
+}
+
+void
+ssdtosyssd(ssd, sd)
+	struct soft_segment_descriptor *ssd;
+	struct system_segment_descriptor *sd;
+{
+
+	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
+	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
+	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
+	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
+	sd->sd_type  = ssd->ssd_type;
+	sd->sd_dpl   = ssd->ssd_dpl;
+	sd->sd_p     = ssd->ssd_p;
+	sd->sd_gran  = ssd->ssd_gran;
+}
+
+#if !defined(DEV_ATPIC) && defined(DEV_ISA)
+#include <isa/isavar.h>
+#include <isa/isareg.h>
+/*
+ * Return a bitmap of the current interrupt requests.  This is 8259-specific
+ * and is only suitable for use at probe time.
+ * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
+ * It shouldn't be here.  There should probably be an APIC centric
+ * implementation in the apic driver code, if at all.
+ */
+intrmask_t
+isa_irq_pending(void)
+{
+	u_char irr1;
+	u_char irr2;
+
+	irr1 = inb(IO_ICU1);
+	irr2 = inb(IO_ICU2);
+	return ((irr2 << 8) | irr1);
+}
+#endif
+
+u_int basemem;
+
+static int
+add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
+{
+	int i, insert_idx, physmap_idx;
+
+	physmap_idx = *physmap_idxp;
+
+	if (boothowto & RB_VERBOSE)
+		printf("SMAP type=%02x base=%016lx len=%016lx\n",
+		    smap->type, smap->base, smap->length);
+
+	if (smap->type != SMAP_TYPE_MEMORY)
+		return (1);
+
+	if (smap->length == 0)
+		return (0);
+
+	/*
+	 * Find insertion point while checking for overlap.  Start off by
+	 * assuming the new entry will be added to the end.
+	 */
+	insert_idx = physmap_idx + 2;
+	for (i = 0; i <= physmap_idx; i += 2) {
+		if (smap->base < physmap[i + 1]) {
+			if (smap->base + smap->length <= physmap[i]) {
+				insert_idx = i;
+				break;
+			}
+			if (boothowto & RB_VERBOSE)
+				printf(
+		    "Overlapping memory regions, ignoring second region\n");
+			return (1);
+		}
+	}
+
+	/* See if we can prepend to the next entry. */
+	if (insert_idx <= physmap_idx &&
+	    smap->base + smap->length == physmap[insert_idx]) {
+		physmap[insert_idx] = smap->base;
+		return (1);
+	}
+
+	/* See if we can append to the previous entry. */
+	if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) {
+		physmap[insert_idx - 1] += smap->length;
+		return (1);
+	}
+
+	physmap_idx += 2;
+	*physmap_idxp = physmap_idx;
+	if (physmap_idx == PHYSMAP_SIZE) {
+		printf(
+		"Too many segments in the physical address map, giving up\n");
+		return (0);
+	}
+
+	/*
+	 * Move the last 'N' entries down to make room for the new
+	 * entry if needed.
+	 */
+	for (i = physmap_idx; i > insert_idx; i -= 2) {
+		physmap[i] = physmap[i - 2];
+		physmap[i + 1] = physmap[i - 1];
+	}
+
+	/* Insert the new entry. */
+	physmap[insert_idx] = smap->base;
+	physmap[insert_idx + 1] = smap->base + smap->length;
+	return (1);
+}
+
+/*
+ * Populate the (physmap) array with base/bound pairs describing the
+ * available physical memory in the system, then test this memory and
+ * build the phys_avail array describing the actually-available memory.
+ *
+ * Total memory size may be set by the kernel environment variable
+ * hw.physmem or the compile-time define MAXMEM.
+ *
+ * XXX first should be vm_paddr_t.
+ */
+static void
+getmemsize(caddr_t kmdp, u_int64_t first)
+{
+	int i, physmap_idx, pa_indx, da_indx;
+	vm_paddr_t pa, physmap[PHYSMAP_SIZE];
+	u_long physmem_start, physmem_tunable, memtest;
+	pt_entry_t *pte;
+	struct bios_smap *smapbase, *smap, *smapend;
+	u_int32_t smapsize;
+	quad_t dcons_addr, dcons_size;
+
+	bzero(physmap, sizeof(physmap));
+	basemem = 0;
+	physmap_idx = 0;
+
+	/*
+	 * get memory map from INT 15:E820, kindly supplied by the loader.
+	 *
+	 * subr_module.c says:
+	 * "Consumer may safely assume that size value precedes data."
+	 * ie: an int32_t immediately precedes smap.
+	 */
+	smapbase = (struct bios_smap *)preload_search_info(kmdp,
+	    MODINFO_METADATA | MODINFOMD_SMAP);
+	if (smapbase == NULL)
+		panic("No BIOS smap info from loader!");
+
+	smapsize = *((u_int32_t *)smapbase - 1);
+	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
+
+	for (smap = smapbase; smap < smapend; smap++)
+		if (!add_smap_entry(smap, physmap, &physmap_idx))
+			break;
+
+	/*
+	 * Find the 'base memory' segment for SMP
+	 */
+	basemem = 0;
+	for (i = 0; i <= physmap_idx; i += 2) {
+		if (physmap[i] == 0x00000000) {
+			basemem = physmap[i + 1] / 1024;
+			break;
+		}
+	}
+	if (basemem == 0)
+		panic("BIOS smap did not include a basemem segment!");
+
+#ifdef SMP
+	/* make hole for AP bootstrap code */
+	physmap[1] = mp_bootaddress(physmap[1] / 1024);
+#endif
+
+	/*
+	 * Maxmem isn't the "maximum memory", it's one larger than the
+	 * highest page of the physical address space.  It should be
+	 * called something like "Maxphyspage".  We may adjust this
+	 * based on ``hw.physmem'' and the results of the memory test.
+	 */
+	Maxmem = atop(physmap[physmap_idx + 1]);
+
+#ifdef MAXMEM
+	Maxmem = MAXMEM / 4;
+#endif
+
+	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
+		Maxmem = atop(physmem_tunable);
+
+	/*
+	 * By default enable the memory test on real hardware, and disable
+	 * it if we appear to be running in a VM.  This avoids touching all
+	 * pages unnecessarily, which doesn't matter on real hardware but is
+	 * bad for shared VM hosts.  Use a general name so that
+	 * one could eventually do more with the code than just disable it.
+	 */
+	memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
+	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
+
+	/*
+	 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
+	 * in the system.
+	 */
+	if (Maxmem > atop(physmap[physmap_idx + 1]))
+		Maxmem = atop(physmap[physmap_idx + 1]);
+
+	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
+	    (boothowto & RB_VERBOSE))
+		printf("Physical memory use set to %ldK\n", Maxmem * 4);
+
+	/* call pmap initialization to make new kernel address space */
+	pmap_bootstrap(&first);
+
+	/*
+	 * Size up each available chunk of physical memory.
+	 *
+	 * XXX Some BIOSes corrupt low 64KB between suspend and resume.
+	 * By default, mask off the first 16 pages unless we appear to be
+	 * running in a VM.
+	 */
+	physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
+	TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
+	if (physmem_start < PAGE_SIZE)
+		physmap[0] = PAGE_SIZE;
+	else if (physmem_start >= physmap[1])
+		physmap[0] = round_page(physmap[1] - PAGE_SIZE);
+	else
+		physmap[0] = round_page(physmem_start);
+	pa_indx = 0;
+	da_indx = 1;
+	phys_avail[pa_indx++] = physmap[0];
+	phys_avail[pa_indx] = physmap[0];
+	dump_avail[da_indx] = physmap[0];
+	pte = CMAP1;
+
+	/*
+	 * Get dcons buffer address
+	 */
+	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
+	    getenv_quad("dcons.size", &dcons_size) == 0)
+		dcons_addr = 0;
+
+	/*
+	 * physmap is in bytes, so when converting to page boundaries,
+	 * round up the start address and round down the end address.
+	 */
+	for (i = 0; i <= physmap_idx; i += 2) {
+		vm_paddr_t end;
+
+		end = ptoa((vm_paddr_t)Maxmem);
+		if (physmap[i + 1] < end)
+			end = trunc_page(physmap[i + 1]);
+		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
+			int tmp, page_bad, full;
+			int *ptr = (int *)CADDR1;
+
+			full = FALSE;
+			/*
+			 * block out kernel memory as not available.
+			 */
+			if (pa >= (vm_paddr_t)kernphys && pa < first)
+				goto do_dump_avail;
+
+			/*
+			 * block out dcons buffer
+			 */
+			if (dcons_addr > 0
+			    && pa >= trunc_page(dcons_addr)
+			    && pa < dcons_addr + dcons_size)
+				goto do_dump_avail;
+
+			page_bad = FALSE;
+			if (memtest == 0)
+				goto skip_memtest;
+
+			/*
+			 * map page into kernel: valid, read/write,non-cacheable
+			 */
+			*pte = pa | PG_V | PG_RW | PG_N;
+			invltlb();
+
+			tmp = *(int *)ptr;
+			/*
+			 * Test for alternating 1's and 0's
+			 */
+			*(volatile int *)ptr = 0xaaaaaaaa;
+			if (*(volatile int *)ptr != 0xaaaaaaaa)
+				page_bad = TRUE;
+			/*
+			 * Test for alternating 0's and 1's
+			 */
+			*(volatile int *)ptr = 0x55555555;
+			if (*(volatile int *)ptr != 0x55555555)
+				page_bad = TRUE;
+			/*
+			 * Test for all 1's
+			 */
+			*(volatile int *)ptr = 0xffffffff;
+			if (*(volatile int *)ptr != 0xffffffff)
+				page_bad = TRUE;
+			/*
+			 * Test for all 0's
+			 */
+			*(volatile int *)ptr = 0x0;
+			if (*(volatile int *)ptr != 0x0)
+				page_bad = TRUE;
+			/*
+			 * Restore original value.
+			 */
+			*(int *)ptr = tmp;
+
+skip_memtest:
+			/*
+			 * Adjust array of valid/good pages.
+			 */
+			if (page_bad == TRUE)
+				continue;
+			/*
+			 * If this good page is a continuation of the
+			 * previous set of good pages, then just increase
+			 * the end pointer. Otherwise start a new chunk.
+			 * Note that "end" points one higher than end,
+			 * making the range >= start and < end.
+			 * If we're also doing a speculative memory
+			 * test and we at or past the end, bump up Maxmem
+			 * so that we keep going. The first bad page
+			 * will terminate the loop.
+			 */
+			if (phys_avail[pa_indx] == pa) {
+				phys_avail[pa_indx] += PAGE_SIZE;
+			} else {
+				pa_indx++;
+				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
+					printf(
+		"Too many holes in the physical address space, giving up\n");
+					pa_indx--;
+					full = TRUE;
+					goto do_dump_avail;
+				}
+				phys_avail[pa_indx++] = pa;	/* start */
+				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
+			}
+			physmem++;
+do_dump_avail:
+			if (dump_avail[da_indx] == pa) {
+				dump_avail[da_indx] += PAGE_SIZE;
+			} else {
+				da_indx++;
+				if (da_indx == DUMP_AVAIL_ARRAY_END) {
+					da_indx--;
+					goto do_next;
+				}
+				dump_avail[da_indx++] = pa; /* start */
+				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
+			}
+do_next:
+			if (full)
+				break;
+		}
+	}
+	*pte = 0;
+	invltlb();
+
+	/*
+	 * XXX
+	 * The last chunk must contain at least one page plus the message
+	 * buffer to avoid complicating other code (message buffer address
+	 * calculation, etc.).
+	 */
+	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
+	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
+		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
+		phys_avail[pa_indx--] = 0;
+		phys_avail[pa_indx--] = 0;
+	}
+
+	Maxmem = atop(phys_avail[pa_indx]);
+
+	/* Trim off space for the message buffer. */
+	phys_avail[pa_indx] -= round_page(msgbufsize);
+
+	/* Map the message buffer. */
+	msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
+}
+
+u_int64_t
+hammer_time(u_int64_t modulep, u_int64_t physfree)
+{
+	caddr_t kmdp;
+	int gsel_tss, x;
+	struct pcpu *pc;
+	struct nmi_pcpu *np;
+	struct xstate_hdr *xhdr;
+	u_int64_t msr;
+	char *env;
+	size_t kstack0_sz;
+
+	thread0.td_kstack = physfree + KERNBASE;
+	thread0.td_kstack_pages = KSTACK_PAGES;
+	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
+	bzero((void *)thread0.td_kstack, kstack0_sz);
+	physfree += kstack0_sz;
+
+	/*
+ 	 * This may be done better later if it gets more high level
+ 	 * components in it. If so just link td->td_proc here.
+	 */
+	proc_linkup0(&proc0, &thread0);
+
+	preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
+	preload_bootstrap_relocate(KERNBASE);
+	kmdp = preload_search_by_type("elf kernel");
+	if (kmdp == NULL)
+		kmdp = preload_search_by_type("elf64 kernel");
+	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
+	kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE;
+#ifdef DDB
+	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
+	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
+#endif
+
+	/* Init basic tunables, hz etc */
+	init_param1();
+
+	/*
+	 * make gdt memory segments
+	 */
+	for (x = 0; x < NGDT; x++) {
+		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
+		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
+			ssdtosd(&gdt_segs[x], &gdt[x]);
+	}
+	gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
+	ssdtosyssd(&gdt_segs[GPROC0_SEL],
+	    (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
+
+	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+	r_gdt.rd_base =  (long) gdt;
+	lgdt(&r_gdt);
+	pc = &__pcpu[0];
+
+	wrmsr(MSR_FSBASE, 0);		/* User value */
+	wrmsr(MSR_GSBASE, (u_int64_t)pc);
+	wrmsr(MSR_KGSBASE, 0);		/* User value while in the kernel */
+
+	pcpu_init(pc, 0, sizeof(struct pcpu));
+	dpcpu_init((void *)(physfree + KERNBASE), 0);
+	physfree += DPCPU_SIZE;
+	PCPU_SET(prvspace, pc);
+	PCPU_SET(curthread, &thread0);
+	PCPU_SET(tssp, &common_tss[0]);
+	PCPU_SET(commontssp, &common_tss[0]);
+	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
+	PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
+	PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
+	PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
+
+	/*
+	 * Initialize mutexes.
+	 *
+	 * icu_lock: in order to allow an interrupt to occur in a critical
+	 * 	     section, to set pcpu->ipending (etc...) properly, we
+	 *	     must be able to get the icu lock, so it can't be
+	 *	     under witness.
+	 */
+	mutex_init();
+	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
+
+	/* exceptions */
+	for (x = 0; x < NIDT; x++)
+		setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_DE, &IDTVEC(div),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
+ 	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYSIGT, SEL_UPL, 0);
+	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
+	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_PF, &IDTVEC(page),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
+#ifdef KDTRACE_HOOKS
+	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
+#endif
+
+	r_idt.rd_limit = sizeof(idt0) - 1;
+	r_idt.rd_base = (long) idt;
+	lidt(&r_idt);
+
+	/*
+	 * Initialize the i8254 before the console so that console
+	 * initialization can use DELAY().
+	 */
+	i8254_init();
+
+	/*
+	 * Initialize the console before we print anything out.
+	 */
+	cninit();
+
+#ifdef DEV_ISA
+#ifdef DEV_ATPIC
+	elcr_probe();
+	atpic_startup();
+#else
+	/* Reset and mask the atpics and leave them shut down. */
+	atpic_reset();
+
+	/*
+	 * Point the ICU spurious interrupt vectors at the APIC spurious
+	 * interrupt handler.
+	 */
+	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
+#endif
+#else
+#error "have you forgotten the isa device?";
+#endif
+
+	kdb_init();
+
+#ifdef KDB
+	if (boothowto & RB_KDB)
+		kdb_enter(KDB_WHY_BOOTFLAGS,
+		    "Boot flags requested debugger");
+#endif
+
+	identify_cpu();		/* Final stage of CPU initialization */
+	initializecpu();	/* Initialize CPU registers */
+	initializecpucache();
+
+	/* doublefault stack space, runs on ist1 */
+	common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
+
+	/*
+	 * NMI stack, runs on ist2.  The pcpu pointer is stored just
+	 * above the start of the ist2 stack.
+	 */
+	np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
+	np->np_pcpu = (register_t) pc;
+	common_tss[0].tss_ist2 = (long) np;
+
+	/* Set the IO permission bitmap (empty due to tss seg limit) */
+	common_tss[0].tss_iobase = sizeof(struct amd64tss) +
+	    IOPAGES * PAGE_SIZE;
+
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	ltr(gsel_tss);
+
+	/* Set up the fast syscall stuff */
+	msr = rdmsr(MSR_EFER) | EFER_SCE;
+	wrmsr(MSR_EFER, msr);
+	wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
+	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
+	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
+	      ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
+	wrmsr(MSR_STAR, msr);
+	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
+
+	getmemsize(kmdp, physfree);
+	init_param2(physmem);
+
+	/* now running on new page tables, configured,and u/iom is accessible */
+
+	msgbufinit(msgbufp, msgbufsize);
+	fpuinit();
+
+	/*
+	 * Set up thread0 pcb after fpuinit calculated pcb + fpu save
+	 * area size.  Zero out the extended state header in fpu save
+	 * area.
+	 */
+	thread0.td_pcb = get_pcb_td(&thread0);
+	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+		    1);
+		xhdr->xstate_bv = xsave_mask;
+	}
+	/* make an initial tss so cpu can get interrupt stack on syscall! */
+	common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb;
+	/* Ensure the stack is aligned to 16 bytes */
+	common_tss[0].tss_rsp0 &= ~0xFul;
+	PCPU_SET(rsp0, common_tss[0].tss_rsp0);
+	PCPU_SET(curpcb, thread0.td_pcb);
+
+	/* transfer to user mode */
+
+	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
+	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
+	_ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
+	_ufssel = GSEL(GUFS32_SEL, SEL_UPL);
+	_ugssel = GSEL(GUGS32_SEL, SEL_UPL);
+
+	load_ds(_udatasel);
+	load_es(_udatasel);
+	load_fs(_ufssel);
+
+	/* setup proc 0's pcb */
+	thread0.td_pcb->pcb_flags = 0;
+	thread0.td_pcb->pcb_cr3 = KPML4phys;
+	thread0.td_frame = &proc0_tf;
+
+        env = getenv("kernelname");
+	if (env != NULL)
+		strlcpy(kernelname, env, sizeof(kernelname));
+
+#ifdef XENHVM
+	if (inw(0x10) == 0x49d2) {
+		if (bootverbose)
+			printf("Xen detected: disabling emulated block and network devices\n");
+		outw(0x10, 3);
+	}
+#endif
+
+	cpu_probe_amdc1e();
+
+	/* Location of kernel stack for locore */
+	return ((u_int64_t)thread0.td_pcb);
+}
+
+void
+cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
+{
+
+	pcpu->pc_acpi_id = 0xffffffff;
+}
+
+void
+spinlock_enter(void)
+{
+	struct thread *td;
+	register_t flags;
+
+	td = curthread;
+	if (td->td_md.md_spinlock_count == 0) {
+		flags = intr_disable();
+		td->td_md.md_spinlock_count = 1;
+		td->td_md.md_saved_flags = flags;
+	} else
+		td->td_md.md_spinlock_count++;
+	critical_enter();
+}
+
+void
+spinlock_exit(void)
+{
+	struct thread *td;
+	register_t flags;
+
+	td = curthread;
+	critical_exit();
+	flags = td->td_md.md_saved_flags;
+	td->td_md.md_spinlock_count--;
+	if (td->td_md.md_spinlock_count == 0)
+		intr_restore(flags);
+}
+
+/*
+ * Construct a PCB from a trapframe. This is called from kdb_trap() where
+ * we want to start a backtrace from the function that caused us to enter
+ * the debugger. We have the context in the trapframe, but base the trace
+ * on the PCB. The PCB doesn't have to be perfect, as long as it contains
+ * enough for a backtrace.
+ */
+void
+makectx(struct trapframe *tf, struct pcb *pcb)
+{
+
+	pcb->pcb_r12 = tf->tf_r12;
+	pcb->pcb_r13 = tf->tf_r13;
+	pcb->pcb_r14 = tf->tf_r14;
+	pcb->pcb_r15 = tf->tf_r15;
+	pcb->pcb_rbp = tf->tf_rbp;
+	pcb->pcb_rbx = tf->tf_rbx;
+	pcb->pcb_rip = tf->tf_rip;
+	pcb->pcb_rsp = tf->tf_rsp;
+}
+
+int
+ptrace_set_pc(struct thread *td, unsigned long addr)
+{
+	td->td_frame->tf_rip = addr;
+	return (0);
+}
+
+int
+ptrace_single_step(struct thread *td)
+{
+	td->td_frame->tf_rflags |= PSL_T;
+	return (0);
+}
+
+int
+ptrace_clear_single_step(struct thread *td)
+{
+	td->td_frame->tf_rflags &= ~PSL_T;
+	return (0);
+}
+
+int
+fill_regs(struct thread *td, struct reg *regs)
+{
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+	return (fill_frame_regs(tp, regs));
+}
+
+int
+fill_frame_regs(struct trapframe *tp, struct reg *regs)
+{
+	regs->r_r15 = tp->tf_r15;
+	regs->r_r14 = tp->tf_r14;
+	regs->r_r13 = tp->tf_r13;
+	regs->r_r12 = tp->tf_r12;
+	regs->r_r11 = tp->tf_r11;
+	regs->r_r10 = tp->tf_r10;
+	regs->r_r9  = tp->tf_r9;
+	regs->r_r8  = tp->tf_r8;
+	regs->r_rdi = tp->tf_rdi;
+	regs->r_rsi = tp->tf_rsi;
+	regs->r_rbp = tp->tf_rbp;
+	regs->r_rbx = tp->tf_rbx;
+	regs->r_rdx = tp->tf_rdx;
+	regs->r_rcx = tp->tf_rcx;
+	regs->r_rax = tp->tf_rax;
+	regs->r_rip = tp->tf_rip;
+	regs->r_cs = tp->tf_cs;
+	regs->r_rflags = tp->tf_rflags;
+	regs->r_rsp = tp->tf_rsp;
+	regs->r_ss = tp->tf_ss;
+	if (tp->tf_flags & TF_HASSEGS) {
+		regs->r_ds = tp->tf_ds;
+		regs->r_es = tp->tf_es;
+		regs->r_fs = tp->tf_fs;
+		regs->r_gs = tp->tf_gs;
+	} else {
+		regs->r_ds = 0;
+		regs->r_es = 0;
+		regs->r_fs = 0;
+		regs->r_gs = 0;
+	}
+	return (0);
+}
+
+int
+set_regs(struct thread *td, struct reg *regs)
+{
+	struct trapframe *tp;
+	register_t rflags;
+
+	tp = td->td_frame;
+	rflags = regs->r_rflags & 0xffffffff;
+	if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
+		return (EINVAL);
+	tp->tf_r15 = regs->r_r15;
+	tp->tf_r14 = regs->r_r14;
+	tp->tf_r13 = regs->r_r13;
+	tp->tf_r12 = regs->r_r12;
+	tp->tf_r11 = regs->r_r11;
+	tp->tf_r10 = regs->r_r10;
+	tp->tf_r9  = regs->r_r9;
+	tp->tf_r8  = regs->r_r8;
+	tp->tf_rdi = regs->r_rdi;
+	tp->tf_rsi = regs->r_rsi;
+	tp->tf_rbp = regs->r_rbp;
+	tp->tf_rbx = regs->r_rbx;
+	tp->tf_rdx = regs->r_rdx;
+	tp->tf_rcx = regs->r_rcx;
+	tp->tf_rax = regs->r_rax;
+	tp->tf_rip = regs->r_rip;
+	tp->tf_cs = regs->r_cs;
+	tp->tf_rflags = rflags;
+	tp->tf_rsp = regs->r_rsp;
+	tp->tf_ss = regs->r_ss;
+	if (0) {	/* XXXKIB */
+		tp->tf_ds = regs->r_ds;
+		tp->tf_es = regs->r_es;
+		tp->tf_fs = regs->r_fs;
+		tp->tf_gs = regs->r_gs;
+		tp->tf_flags = TF_HASSEGS;
+		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	}
+	return (0);
+}
+
+/* XXX check all this stuff! */
+/* externalize from sv_xmm */
+static void
+fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
+{
+	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
+	struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	int i;
+
+	/* pcb -> fpregs */
+	bzero(fpregs, sizeof(*fpregs));
+
+	/* FPU control/status */
+	penv_fpreg->en_cw = penv_xmm->en_cw;
+	penv_fpreg->en_sw = penv_xmm->en_sw;
+	penv_fpreg->en_tw = penv_xmm->en_tw;
+	penv_fpreg->en_opcode = penv_xmm->en_opcode;
+	penv_fpreg->en_rip = penv_xmm->en_rip;
+	penv_fpreg->en_rdp = penv_xmm->en_rdp;
+	penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
+	penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
+
+	/* SSE registers */
+	for (i = 0; i < 16; ++i)
+		bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
+}
+
+/* internalize from fpregs into sv_xmm */
+static void
+set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
+{
+	struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
+	int i;
+
+	/* fpregs -> pcb */
+	/* FPU control/status */
+	penv_xmm->en_cw = penv_fpreg->en_cw;
+	penv_xmm->en_sw = penv_fpreg->en_sw;
+	penv_xmm->en_tw = penv_fpreg->en_tw;
+	penv_xmm->en_opcode = penv_fpreg->en_opcode;
+	penv_xmm->en_rip = penv_fpreg->en_rip;
+	penv_xmm->en_rdp = penv_fpreg->en_rdp;
+	penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
+	penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
+
+	/* SSE registers */
+	for (i = 0; i < 16; ++i)
+		bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
+}
+
+/* externalize from td->pcb */
+int
+fill_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+
+	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
+	    P_SHOULDSTOP(td->td_proc),
+	    ("not suspended thread %p", td));
+	fpugetregs(td);
+	fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
+	return (0);
+}
+
+/* internalize to td->pcb */
+int
+set_fpregs(struct thread *td, struct fpreg *fpregs)
+{
+
+	set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
+	fpuuserinited(td);
+	return (0);
+}
+
+/*
+ * Get machine context.
+ */
+int
+get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	pcb = td->td_pcb;
+	tp = td->td_frame;
+	PROC_LOCK(curthread->td_proc);
+	mcp->mc_onstack = sigonstack(tp->tf_rsp);
+	PROC_UNLOCK(curthread->td_proc);
+	mcp->mc_r15 = tp->tf_r15;
+	mcp->mc_r14 = tp->tf_r14;
+	mcp->mc_r13 = tp->tf_r13;
+	mcp->mc_r12 = tp->tf_r12;
+	mcp->mc_r11 = tp->tf_r11;
+	mcp->mc_r10 = tp->tf_r10;
+	mcp->mc_r9  = tp->tf_r9;
+	mcp->mc_r8  = tp->tf_r8;
+	mcp->mc_rdi = tp->tf_rdi;
+	mcp->mc_rsi = tp->tf_rsi;
+	mcp->mc_rbp = tp->tf_rbp;
+	mcp->mc_rbx = tp->tf_rbx;
+	mcp->mc_rcx = tp->tf_rcx;
+	mcp->mc_rflags = tp->tf_rflags;
+	if (flags & GET_MC_CLEAR_RET) {
+		mcp->mc_rax = 0;
+		mcp->mc_rdx = 0;
+		mcp->mc_rflags &= ~PSL_C;
+	} else {
+		mcp->mc_rax = tp->tf_rax;
+		mcp->mc_rdx = tp->tf_rdx;
+	}
+	mcp->mc_rip = tp->tf_rip;
+	mcp->mc_cs = tp->tf_cs;
+	mcp->mc_rsp = tp->tf_rsp;
+	mcp->mc_ss = tp->tf_ss;
+	mcp->mc_ds = tp->tf_ds;
+	mcp->mc_es = tp->tf_es;
+	mcp->mc_fs = tp->tf_fs;
+	mcp->mc_gs = tp->tf_gs;
+	mcp->mc_flags = tp->tf_flags;
+	mcp->mc_len = sizeof(*mcp);
+	get_fpcontext(td, mcp, NULL, 0);
+	mcp->mc_fsbase = pcb->pcb_fsbase;
+	mcp->mc_gsbase = pcb->pcb_gsbase;
+	mcp->mc_xfpustate = 0;
+	mcp->mc_xfpustate_len = 0;
+	bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
+	return (0);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifiable flags, and we won't
+ * touch the cs selector.
+ */
+int
+set_mcontext(struct thread *td, const mcontext_t *mcp)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+	char *xfpustate;
+	long rflags;
+	int ret;
+
+	pcb = td->td_pcb;
+	tp = td->td_frame;
+	if (mcp->mc_len != sizeof(*mcp) ||
+	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
+		return (EINVAL);
+	rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
+	    (tp->tf_rflags & ~PSL_USERCHANGE);
+	if (mcp->mc_flags & _MC_HASFPXSTATE) {
+		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu))
+			return (EINVAL);
+		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+		    mcp->mc_xfpustate_len);
+		if (ret != 0)
+			return (ret);
+	} else
+		xfpustate = NULL;
+	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+	if (ret != 0)
+		return (ret);
+	tp->tf_r15 = mcp->mc_r15;
+	tp->tf_r14 = mcp->mc_r14;
+	tp->tf_r13 = mcp->mc_r13;
+	tp->tf_r12 = mcp->mc_r12;
+	tp->tf_r11 = mcp->mc_r11;
+	tp->tf_r10 = mcp->mc_r10;
+	tp->tf_r9  = mcp->mc_r9;
+	tp->tf_r8  = mcp->mc_r8;
+	tp->tf_rdi = mcp->mc_rdi;
+	tp->tf_rsi = mcp->mc_rsi;
+	tp->tf_rbp = mcp->mc_rbp;
+	tp->tf_rbx = mcp->mc_rbx;
+	tp->tf_rdx = mcp->mc_rdx;
+	tp->tf_rcx = mcp->mc_rcx;
+	tp->tf_rax = mcp->mc_rax;
+	tp->tf_rip = mcp->mc_rip;
+	tp->tf_rflags = rflags;
+	tp->tf_rsp = mcp->mc_rsp;
+	tp->tf_ss = mcp->mc_ss;
+	tp->tf_flags = mcp->mc_flags;
+	if (tp->tf_flags & TF_HASSEGS) {
+		tp->tf_ds = mcp->mc_ds;
+		tp->tf_es = mcp->mc_es;
+		tp->tf_fs = mcp->mc_fs;
+		tp->tf_gs = mcp->mc_gs;
+	}
+	if (mcp->mc_flags & _MC_HASBASES) {
+		pcb->pcb_fsbase = mcp->mc_fsbase;
+		pcb->pcb_gsbase = mcp->mc_gsbase;
+	}
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+	return (0);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+    size_t xfpusave_len)
+{
+	size_t max_len, len;
+
+	mcp->mc_ownedfp = fpugetregs(td);
+	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate,
+	    sizeof(mcp->mc_fpstate));
+	mcp->mc_fpformat = fpuformat();
+	if (!use_xsave || xfpusave_len == 0)
+		return;
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	len = xfpusave_len;
+	if (len > max_len) {
+		len = max_len;
+		bzero(xfpusave + max_len, len - max_len);
+	}
+	mcp->mc_flags |= _MC_HASFPXSTATE;
+	mcp->mc_xfpustate_len = len;
+	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
+}
+
+static int
+set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate,
+    size_t xfpustate_len)
+{
+	struct savefpu *fpstate;
+	int error;
+
+	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+		return (0);
+	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
+		return (EINVAL);
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
+		/* We don't care what state is left in the FPU or PCB. */
+		fpstate_drop(td);
+		error = 0;
+	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+		fpstate = (struct savefpu *)&mcp->mc_fpstate;
+		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
+		error = fpusetregs(td, fpstate, xfpustate, xfpustate_len);
+	} else
+		return (EINVAL);
+	return (error);
+}
+
+void
+fpstate_drop(struct thread *td)
+{
+
+	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
+	critical_enter();
+	if (PCPU_GET(fpcurthread) == td)
+		fpudrop();
+	/*
+	 * XXX force a full drop of the fpu.  The above only drops it if we
+	 * owned it.
+	 *
+	 * XXX I don't much like fpugetuserregs()'s semantics of doing a full
+	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
+	 * We only need to drop to !PCB_INITDONE in sendsig().  But
+	 * sendsig() is the only caller of fpugetuserregs()... perhaps we just
+	 * have too many layers.
+	 */
+	clear_pcb_flags(curthread->td_pcb,
+	    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
+	critical_exit();
+}
+
+int
+fill_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+	struct pcb *pcb;
+
+	if (td == NULL) {
+		dbregs->dr[0] = rdr0();
+		dbregs->dr[1] = rdr1();
+		dbregs->dr[2] = rdr2();
+		dbregs->dr[3] = rdr3();
+		dbregs->dr[6] = rdr6();
+		dbregs->dr[7] = rdr7();
+	} else {
+		pcb = td->td_pcb;
+		dbregs->dr[0] = pcb->pcb_dr0;
+		dbregs->dr[1] = pcb->pcb_dr1;
+		dbregs->dr[2] = pcb->pcb_dr2;
+		dbregs->dr[3] = pcb->pcb_dr3;
+		dbregs->dr[6] = pcb->pcb_dr6;
+		dbregs->dr[7] = pcb->pcb_dr7;
+	}
+	dbregs->dr[4] = 0;
+	dbregs->dr[5] = 0;
+	dbregs->dr[8] = 0;
+	dbregs->dr[9] = 0;
+	dbregs->dr[10] = 0;
+	dbregs->dr[11] = 0;
+	dbregs->dr[12] = 0;
+	dbregs->dr[13] = 0;
+	dbregs->dr[14] = 0;
+	dbregs->dr[15] = 0;
+	return (0);
+}
+
+int
+set_dbregs(struct thread *td, struct dbreg *dbregs)
+{
+	struct pcb *pcb;
+	int i;
+
+	if (td == NULL) {
+		load_dr0(dbregs->dr[0]);
+		load_dr1(dbregs->dr[1]);
+		load_dr2(dbregs->dr[2]);
+		load_dr3(dbregs->dr[3]);
+		load_dr6(dbregs->dr[6]);
+		load_dr7(dbregs->dr[7]);
+	} else {
+		/*
+		 * Don't let an illegal value for dr7 get set.  Specifically,
+		 * check for undefined settings.  Setting these bit patterns
+		 * result in undefined behaviour and can lead to an unexpected
+		 * TRCTRAP or a general protection fault right here.
+		 * Upper bits of dr6 and dr7 must not be set
+		 */
+		for (i = 0; i < 4; i++) {
+			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
+				return (EINVAL);
+			if (td->td_frame->tf_cs == _ucode32sel &&
+			    DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
+				return (EINVAL);
+		}
+		if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
+		    (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
+			return (EINVAL);
+
+		pcb = td->td_pcb;
+
+		/*
+		 * Don't let a process set a breakpoint that is not within the
+		 * process's address space.  If a process could do this, it
+		 * could halt the system by setting a breakpoint in the kernel
+		 * (if ddb was enabled).  Thus, we need to check to make sure
+		 * that no breakpoints are being enabled for addresses outside
+		 * process's address space.
+		 *
+		 * XXX - what about when the watched area of the user's
+		 * address space is written into from within the kernel
+		 * ... wouldn't that still cause a breakpoint to be generated
+		 * from within kernel mode?
+		 */
+
+		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
+			/* dr0 is enabled */
+			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
+				return (EINVAL);
+		}
+		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
+			/* dr1 is enabled */
+			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
+				return (EINVAL);
+		}
+		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
+			/* dr2 is enabled */
+			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
+				return (EINVAL);
+		}
+		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
+			/* dr3 is enabled */
+			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
+				return (EINVAL);
+		}
+
+		pcb->pcb_dr0 = dbregs->dr[0];
+		pcb->pcb_dr1 = dbregs->dr[1];
+		pcb->pcb_dr2 = dbregs->dr[2];
+		pcb->pcb_dr3 = dbregs->dr[3];
+		pcb->pcb_dr6 = dbregs->dr[6];
+		pcb->pcb_dr7 = dbregs->dr[7];
+
+		set_pcb_flags(pcb, PCB_DBREGS);
+	}
+
+	return (0);
+}
+
+void
+reset_dbregs(void)
+{
+
+	load_dr7(0);	/* Turn off the control bits first */
+	load_dr0(0);
+	load_dr1(0);
+	load_dr2(0);
+	load_dr3(0);
+	load_dr6(0);
+}
+
+/*
+ * Return > 0 if a hardware breakpoint has been hit, and the
+ * breakpoint was in user space.  Return 0, otherwise.
+ */
+int
+user_dbreg_trap(void)
+{
+        u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */
+        u_int64_t bp;       /* breakpoint bits extracted from dr6 */
+        int nbp;            /* number of breakpoints that triggered */
+        caddr_t addr[4];    /* breakpoint addresses */
+        int i;
+        
+        dr7 = rdr7();
+        if ((dr7 & 0x000000ff) == 0) {
+                /*
+                 * all GE and LE bits in the dr7 register are zero,
+                 * thus the trap couldn't have been caused by the
+                 * hardware debug registers
+                 */
+                return 0;
+        }
+
+        nbp = 0;
+        dr6 = rdr6();
+        bp = dr6 & 0x0000000f;
+
+        if (!bp) {
+                /*
+                 * None of the breakpoint bits are set meaning this
+                 * trap was not caused by any of the debug registers
+                 */
+                return 0;
+        }
+
+        /*
+         * at least one of the breakpoints were hit, check to see
+         * which ones and if any of them are user space addresses
+         */
+
+        if (bp & 0x01) {
+                addr[nbp++] = (caddr_t)rdr0();
+        }
+        if (bp & 0x02) {
+                addr[nbp++] = (caddr_t)rdr1();
+        }
+        if (bp & 0x04) {
+                addr[nbp++] = (caddr_t)rdr2();
+        }
+        if (bp & 0x08) {
+                addr[nbp++] = (caddr_t)rdr3();
+        }
+
+        for (i = 0; i < nbp; i++) {
+                if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
+                        /*
+                         * addr[i] is in user space
+                         */
+                        return nbp;
+                }
+        }
+
+        /*
+         * None of the breakpoints are in user space.
+         */
+        return 0;
+}
+
+#ifdef KDB
+
+/*
+ * Provide inb() and outb() as functions.  They are normally only available as
+ * inline functions, thus cannot be called from the debugger.
+ */
+
+/* silence compiler warnings */
+u_char inb_(u_short);
+void outb_(u_short, u_char);
+
+u_char
+inb_(u_short port)
+{
+	return inb(port);
+}
+
+void
+outb_(u_short port, u_char data)
+{
+	outb(port, data);
+}
+
+#endif /* KDB */
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
new file mode 100644
index 0000000..abbbb21
--- /dev/null
+++ b/sys/amd64/amd64/mem.c
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and code derived from software contributed to
+ * Berkeley by William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: Utah $Hdr: mem.c 1.13 89/10/08$
+ *	from: @(#)mem.c	7.2 (Berkeley) 5/9/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Memory special file
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <machine/specialreg.h>
+#include <machine/vmparam.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+#include <machine/memdev.h>
+
+/*
+ * Used in /dev/mem drivers and elsewhere
+ */
+MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
+
+/* ARGSUSED */
+int
+memrw(struct cdev *dev, struct uio *uio, int flags)
+{
+	int o;
+	u_long c = 0, v;
+	struct iovec *iov;
+	int error = 0;
+	vm_offset_t addr, eaddr;
+
+	GIANT_REQUIRED;
+
+	while (uio->uio_resid > 0 && error == 0) {
+		iov = uio->uio_iov;
+		if (iov->iov_len == 0) {
+			uio->uio_iov++;
+			uio->uio_iovcnt--;
+			if (uio->uio_iovcnt < 0)
+				panic("memrw");
+			continue;
+		}
+		if (dev2unit(dev) == CDEV_MINOR_MEM) {
+			v = uio->uio_offset;
+kmemphys:
+			o = v & PAGE_MASK;
+			c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
+			error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
+			continue;
+		}
+		else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
+			v = uio->uio_offset;
+
+			if (v >= DMAP_MIN_ADDRESS && v < DMAP_MAX_ADDRESS) {
+				v = DMAP_TO_PHYS(v);
+				goto kmemphys;
+			}
+
+			c = iov->iov_len;
+
+			/*
+			 * Make sure that all of the pages are currently
+			 * resident so that we don't create any zero-fill
+			 * pages.
+			 */
+			addr = trunc_page(v);
+			eaddr = round_page(v + c);
+
+			if (addr < VM_MIN_KERNEL_ADDRESS)
+				return (EFAULT);
+			for (; addr < eaddr; addr += PAGE_SIZE) 
+				if (pmap_extract(kernel_pmap, addr) == 0)
+					return (EFAULT);
+
+			if (!kernacc((caddr_t)(long)v, c,
+			    uio->uio_rw == UIO_READ ? 
+			    VM_PROT_READ : VM_PROT_WRITE))
+				return (EFAULT);
+
+			error = uiomove((caddr_t)(long)v, (int)c, uio);
+			continue;
+		}
+		/* else panic! */
+	}
+	return (error);
+}
+
+/*
+ * allow user processes to MMAP some memory sections
+ * instead of going through read/write
+ */
+/* ARGSUSED */
+int
+memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
+    int prot __unused, vm_memattr_t *memattr __unused)
+{
+	if (dev2unit(dev) == CDEV_MINOR_MEM)
+		*paddr = offset;
+	else if (dev2unit(dev) == CDEV_MINOR_KMEM)
+        	*paddr = vtophys(offset);
+	/* else panic! */
+	return (0);
+}
+
+/*
+ * Operations for changing memory attributes.
+ *
+ * This is basically just an ioctl shim for mem_range_attr_get
+ * and mem_range_attr_set.
+ */
+/* ARGSUSED */
+int 
+memioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags,
+    struct thread *td)
+{
+	int nd, error = 0;
+	struct mem_range_op *mo = (struct mem_range_op *)data;
+	struct mem_range_desc *md;
+	
+	/* is this for us? */
+	if ((cmd != MEMRANGE_GET) &&
+	    (cmd != MEMRANGE_SET))
+		return (ENOTTY);
+
+	/* any chance we can handle this? */
+	if (mem_range_softc.mr_op == NULL)
+		return (EOPNOTSUPP);
+
+	/* do we have any descriptors? */
+	if (mem_range_softc.mr_ndesc == 0)
+		return (ENXIO);
+
+	switch (cmd) {
+	case MEMRANGE_GET:
+		nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
+		if (nd > 0) {
+			md = (struct mem_range_desc *)
+				malloc(nd * sizeof(struct mem_range_desc),
+				       M_MEMDESC, M_WAITOK);
+			error = mem_range_attr_get(md, &nd);
+			if (!error)
+				error = copyout(md, mo->mo_desc, 
+					nd * sizeof(struct mem_range_desc));
+			free(md, M_MEMDESC);
+		}
+		else
+			nd = mem_range_softc.mr_ndesc;
+		mo->mo_arg[0] = nd;
+		break;
+		
+	case MEMRANGE_SET:
+		md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc),
+						    M_MEMDESC, M_WAITOK);
+		error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
+		/* clamp description string */
+		md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
+		if (error == 0)
+			error = mem_range_attr_set(md, &mo->mo_arg[0]);
+		free(md, M_MEMDESC);
+		break;
+	}
+	return (error);
+}
diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c
new file mode 100644
index 0000000..79d8bde
--- /dev/null
+++ b/sys/amd64/amd64/minidump_machdep.c
@@ -0,0 +1,479 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_pmap.h"
+#include "opt_watchdog.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/kernel.h>
+#include <sys/kerneldump.h>
+#include <sys/msgbuf.h>
+#include <sys/watchdog.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <vm/pmap.h>
+#include <machine/atomic.h>
+#include <machine/elf.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/minidump.h>
+
+CTASSERT(sizeof(struct kerneldumpheader) == 512);
+
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define	SIZEOF_METADATA		(64*1024)
+
+#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+uint64_t *vm_page_dump;
+int vm_page_dump_size;
+
+static struct kerneldumpheader kdh;
+static off_t dumplo;
+
+/* Handle chunked writes. */
+static size_t fragsz;
+static void *dump_va;
+static size_t counter, progress, dumpsize;
+
+CTASSERT(sizeof(*vm_page_dump) == 8);
+
+static int
+is_dumpable(vm_paddr_t pa)
+{
+	vm_page_t m;
+	int i;
+
+	if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
+		return ((m->flags & PG_NODUMP) == 0);
+	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
+		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
+			return (1);
+	}
+	return (0);
+}
+
+#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
+
+static int
+blk_flush(struct dumperinfo *di)
+{
+	int error;
+
+	if (fragsz == 0)
+		return (0);
+
+	error = dump_write(di, dump_va, 0, dumplo, fragsz);
+	dumplo += fragsz;
+	fragsz = 0;
+	return (error);
+}
+
+static struct {
+	int min_per;
+	int max_per;
+	int visited;
+} progress_track[10] = {
+	{  0,  10, 0},
+	{ 10,  20, 0},
+	{ 20,  30, 0},
+	{ 30,  40, 0},
+	{ 40,  50, 0},
+	{ 50,  60, 0},
+	{ 60,  70, 0},
+	{ 70,  80, 0},
+	{ 80,  90, 0},
+	{ 90, 100, 0}
+};
+
+static void
+report_progress(size_t progress, size_t dumpsize)
+{
+	int sofar, i;
+
+	sofar = 100 - ((progress * 100) / dumpsize);
+	for (i = 0; i < 10; i++) {
+		if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per)
+			continue;
+		if (progress_track[i].visited)
+			return;
+		progress_track[i].visited = 1;
+		printf("..%d%%", sofar);
+		return;
+	}
+}
+
+static int
+blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
+{
+	size_t len;
+	int error, i, c;
+	u_int maxdumpsz;
+
+	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
+	if (maxdumpsz == 0)	/* seatbelt */
+		maxdumpsz = PAGE_SIZE;
+	error = 0;
+	if ((sz % PAGE_SIZE) != 0) {
+		printf("size not page aligned\n");
+		return (EINVAL);
+	}
+	if (ptr != NULL && pa != 0) {
+		printf("cant have both va and pa!\n");
+		return (EINVAL);
+	}
+	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
+		printf("address not page aligned\n");
+		return (EINVAL);
+	}
+	if (ptr != NULL) {
+		/* If we're doing a virtual dump, flush any pre-existing pa pages */
+		error = blk_flush(di);
+		if (error)
+			return (error);
+	}
+	while (sz) {
+		len = maxdumpsz - fragsz;
+		if (len > sz)
+			len = sz;
+		counter += len;
+		progress -= len;
+		if (counter >> 24) {
+			report_progress(progress, dumpsize);
+			counter &= (1<<24) - 1;
+		}
+
+		wdog_kern_pat(WD_LASTVAL);
+
+		if (ptr) {
+			error = dump_write(di, ptr, 0, dumplo, len);
+			if (error)
+				return (error);
+			dumplo += len;
+			ptr += len;
+			sz -= len;
+		} else {
+			for (i = 0; i < len; i += PAGE_SIZE)
+				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
+			fragsz += len;
+			pa += len;
+			sz -= len;
+			if (fragsz == maxdumpsz) {
+				error = blk_flush(di);
+				if (error)
+					return (error);
+			}
+		}
+
+		/* Check for user abort. */
+		c = cncheckc();
+		if (c == 0x03)
+			return (ECANCELED);
+		if (c != -1)
+			printf(" (CTRL-C to abort) ");
+	}
+
+	return (0);
+}
+
+/* A fake page table page, to avoid having to handle both 4K and 2M pages */
+static pd_entry_t fakepd[NPDEPG];
+
+void
+minidumpsys(struct dumperinfo *di)
+{
+	uint32_t pmapsize;
+	vm_offset_t va;
+	int error;
+	uint64_t bits;
+	uint64_t *pdp, *pd, *pt, pa;
+	int i, j, k, n, bit;
+	int retry_count;
+	struct minidumphdr mdhdr;
+
+	retry_count = 0;
+ retry:
+	retry_count++;
+	counter = 0;
+	/* Walk page table pages, set bits in vm_page_dump */
+	pmapsize = 0;
+	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
+	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
+	    kernel_vm_end); ) {
+		/*
+		 * We always write a page, even if it is zero. Each
+		 * page written corresponds to 1GB of space
+		 */
+		pmapsize += PAGE_SIZE;
+		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
+		if ((pdp[i] & PG_V) == 0) {
+			va += NBPDP;
+			continue;
+		}
+
+		/*
+		 * 1GB page is represented as 512 2MB pages in a dump.
+		 */
+		if ((pdp[i] & PG_PS) != 0) {
+			va += NBPDP;
+			pa = pdp[i] & PG_PS_FRAME;
+			for (n = 0; n < NPDEPG * NPTEPG; n++) {
+				if (is_dumpable(pa))
+					dump_add_page(pa);
+				pa += PAGE_SIZE;
+			}
+			continue;
+		}
+
+		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
+		for (n = 0; n < NPDEPG; n++, va += NBPDR) {
+			j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
+
+			if ((pd[j] & PG_V) == 0)
+				continue;
+
+			if ((pd[j] & PG_PS) != 0) {
+				/* This is an entire 2M page. */
+				pa = pd[j] & PG_PS_FRAME;
+				for (k = 0; k < NPTEPG; k++) {
+					if (is_dumpable(pa))
+						dump_add_page(pa);
+					pa += PAGE_SIZE;
+				}
+				continue;
+			}
+
+			pa = pd[j] & PG_FRAME;
+			/* set bit for this PTE page */
+			if (is_dumpable(pa))
+				dump_add_page(pa);
+			/* and for each valid page in this 2MB block */
+			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
+			for (k = 0; k < NPTEPG; k++) {
+				if ((pt[k] & PG_V) == 0)
+					continue;
+				pa = pt[k] & PG_FRAME;
+				if (is_dumpable(pa))
+					dump_add_page(pa);
+			}
+		}
+	}
+
+	/* Calculate dump size. */
+	dumpsize = pmapsize;
+	dumpsize += round_page(msgbufp->msg_size);
+	dumpsize += round_page(vm_page_dump_size);
+	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+		bits = vm_page_dump[i];
+		while (bits) {
+			bit = bsfq(bits);
+			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+			/* Clear out undumpable pages now if needed */
+			if (is_dumpable(pa)) {
+				dumpsize += PAGE_SIZE;
+			} else {
+				dump_drop_page(pa);
+			}
+			bits &= ~(1ul << bit);
+		}
+	}
+	dumpsize += PAGE_SIZE;
+
+	/* Determine dump offset on device. */
+	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+		error = E2BIG;
+		goto fail;
+	}
+	dumplo = di->mediaoffset + di->mediasize - dumpsize;
+	dumplo -= sizeof(kdh) * 2;
+	progress = dumpsize;
+
+	/* Initialize mdhdr */
+	bzero(&mdhdr, sizeof(mdhdr));
+	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
+	mdhdr.version = MINIDUMP_VERSION;
+	mdhdr.msgbufsize = msgbufp->msg_size;
+	mdhdr.bitmapsize = vm_page_dump_size;
+	mdhdr.pmapsize = pmapsize;
+	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
+	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
+	mdhdr.dmapend = DMAP_MAX_ADDRESS;
+
+	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
+
+	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
+	    ptoa((uintmax_t)physmem) / 1048576);
+
+	/* Dump leader */
+	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
+	if (error)
+		goto fail;
+	dumplo += sizeof(kdh);
+
+	/* Dump my header */
+	bzero(&fakepd, sizeof(fakepd));
+	bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
+	error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
+	if (error)
+		goto fail;
+
+	/* Dump msgbuf up front */
+	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
+	if (error)
+		goto fail;
+
+	/* Dump bitmap */
+	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
+	if (error)
+		goto fail;
+
+	/* Dump kernel page directory pages */
+	bzero(fakepd, sizeof(fakepd));
+	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
+	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
+	    kernel_vm_end); va += NBPDP) {
+		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
+
+		/* We always write a page, even if it is zero */
+		if ((pdp[i] & PG_V) == 0) {
+			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
+			if (error)
+				goto fail;
+			/* flush, in case we reuse fakepd in the same block */
+			error = blk_flush(di);
+			if (error)
+				goto fail;
+			continue;
+		}
+
+		/* 1GB page is represented as 512 2MB pages in a dump */
+		if ((pdp[i] & PG_PS) != 0) {
+			/* PDPE and PDP have identical layout in this case */
+			fakepd[0] = pdp[i];
+			for (j = 1; j < NPDEPG; j++)
+				fakepd[j] = fakepd[j - 1] + NBPDR;
+			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
+			if (error)
+				goto fail;
+			/* flush, in case we reuse fakepd in the same block */
+			error = blk_flush(di);
+			if (error)
+				goto fail;
+			bzero(fakepd, sizeof(fakepd));
+			continue;
+		}
+
+		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
+		error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
+		if (error)
+			goto fail;
+		error = blk_flush(di);
+		if (error)
+			goto fail;
+	}
+
+	/* Dump memory chunks */
+	/* XXX cluster it up and use blk_dump() */
+	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
+		bits = vm_page_dump[i];
+		while (bits) {
+			bit = bsfq(bits);
+			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
+			error = blk_write(di, 0, pa, PAGE_SIZE);
+			if (error)
+				goto fail;
+			bits &= ~(1ul << bit);
+		}
+	}
+
+	error = blk_flush(di);
+	if (error)
+		goto fail;
+
+	/* Dump trailer */
+	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
+	if (error)
+		goto fail;
+	dumplo += sizeof(kdh);
+
+	/* Signal completion, signoff and exit stage left. */
+	dump_write(di, NULL, 0, 0, 0);
+	printf("\nDump complete\n");
+	return;
+
+ fail:
+	if (error < 0)
+		error = -error;
+
+	printf("\n");
+	if (error == ENOSPC) {
+		printf("Dump map grown while dumping. ");
+		if (retry_count < 5) {
+			printf("Retrying...\n");
+			goto retry;
+		}
+		printf("Dump failed.\n");
+	}
+	else if (error == ECANCELED)
+		printf("Dump aborted\n");
+	else if (error == E2BIG)
+		printf("Dump failed. Partition too small.\n");
+	else
+		printf("** DUMP FAILED (ERROR %d) **\n", error);
+}
+
+void
+dump_add_page(vm_paddr_t pa)
+{
+	int idx, bit;
+
+	pa >>= PAGE_SHIFT;
+	idx = pa >> 6;		/* 2^6 = 64 */
+	bit = pa & 63;
+	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
+}
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+	int idx, bit;
+
+	pa >>= PAGE_SHIFT;
+	idx = pa >> 6;		/* 2^6 = 64 */
+	bit = pa & 63;
+	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
+}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
new file mode 100644
index 0000000..31dbb3f
--- /dev/null
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -0,0 +1,1488 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * Copyright (c) 2003, by Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_cpu.h"
+#include "opt_kstack_pages.h"
+#include "opt_sched.h"
+#include "opt_smp.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpuset.h>
+#ifdef GPROF 
+#include <sys/gmon.h>
+#endif
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+#include <x86/apicreg.h>
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <machine/cpufunc.h>
+#include <x86/mca.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+#include <machine/tss.h>
+
+#define WARMBOOT_TARGET		0
+#define WARMBOOT_OFF		(KERNBASE + 0x0467)
+#define WARMBOOT_SEG		(KERNBASE + 0x0469)
+
+#define CMOS_REG		(0x70)
+#define CMOS_DATA		(0x71)
+#define BIOS_RESET		(0x0f)
+#define BIOS_WARM		(0x0a)
+
+/* lock region used by kernel profiling */
+int	mcount_lock;
+
+int	mp_naps;		/* # of Applications processors */
+int	boot_cpu_id = -1;	/* designated BSP */
+
+extern  struct pcpu __pcpu[];
+
+/* AP uses this during bootstrap.  Do not staticize.  */
+char *bootSTK;
+static int bootAP;
+
+/* Free these after use */
+void *bootstacks[MAXCPU];
+
+/* Temporary variables for init_secondary()  */
+char *doublefault_stack;
+char *nmi_stack;
+void *dpcpu;
+
+struct pcb stoppcbs[MAXCPU];
+struct pcb **susppcbs;
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1;
+vm_offset_t smp_tlb_addr2;
+volatile int smp_tlb_wait;
+
+#ifdef COUNT_IPIS
+/* Interrupt counts. */
+static u_long *ipi_preempt_counts[MAXCPU];
+static u_long *ipi_ast_counts[MAXCPU];
+u_long *ipi_invltlb_counts[MAXCPU];
+u_long *ipi_invlrng_counts[MAXCPU];
+u_long *ipi_invlpg_counts[MAXCPU];
+u_long *ipi_invlcache_counts[MAXCPU];
+u_long *ipi_rendezvous_counts[MAXCPU];
+static u_long *ipi_hardclock_counts[MAXCPU];
+#endif
+
+extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
+
+/*
+ * Local data and functions.
+ */
+
+static volatile cpuset_t ipi_nmi_pending;
+
+/* used to hold the AP's until we are ready to release them */
+static struct mtx ap_boot_mtx;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+static volatile int aps_ready = 0;
+
+/*
+ * Store data from cpu_add() until later in the boot when we actually setup
+ * the APs.
+ */
+struct cpu_info {
+	int	cpu_present:1;
+	int	cpu_bsp:1;
+	int	cpu_disabled:1;
+	int	cpu_hyperthread:1;
+} static cpu_info[MAX_APIC_ID + 1];
+int cpu_apic_ids[MAXCPU];
+int apic_cpuids[MAX_APIC_ID + 1];
+
+/* Holds pending bitmap based IPIs per CPU */
+static volatile u_int cpu_ipi_pending[MAXCPU];
+
+static u_int boot_address;
+static int cpu_logical;			/* logical cpus per core */
+static int cpu_cores;			/* cores per package */
+
+static void	assign_cpu_ids(void);
+static void	set_interrupt_apic_ids(void);
+static int	start_all_aps(void);
+static int	start_ap(int apic_id);
+static void	release_aps(void *dummy);
+
+static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
+static int	hyperthreading_allowed = 1;
+static u_int	bootMP_size;
+
+static void
+mem_range_AP_init(void)
+{
+	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
+		mem_range_softc.mr_op->initAP(&mem_range_softc);
+}
+
+static void
+topo_probe_amd(void)
+{
+	int core_id_bits;
+	int id;
+
+	/* AMD processors do not support HTT. */
+	cpu_logical = 1;
+
+	if ((amd_feature2 & AMDID2_CMP) == 0) {
+		cpu_cores = 1;
+		return;
+	}
+
+	core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
+	    AMDID_COREID_SIZE_SHIFT;
+	if (core_id_bits == 0) {
+		cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
+		return;
+	}
+
+	/* Fam 10h and newer should get here. */
+	for (id = 0; id <= MAX_APIC_ID; id++) {
+		/* Check logical CPU availability. */
+		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
+			continue;
+		/* Check if logical CPU has the same package ID. */
+		if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
+			continue;
+		cpu_cores++;
+	}
+}
+
+/*
+ * Round up to the next power of two, if necessary, and then
+ * take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+mask_width(u_int x)
+{
+
+	return (fls(x << (1 - powerof2(x))) - 1);
+}
+
+static void
+topo_probe_0x4(void)
+{
+	u_int p[4];
+	int pkg_id_bits;
+	int core_id_bits;
+	int max_cores;
+	int max_logical;
+	int id;
+
+	/* Both zero and one here mean one logical processor per package. */
+	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
+	    (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
+	if (max_logical <= 1)
+		return;
+
+	/*
+	 * Because of uniformity assumption we examine only
+	 * those logical processors that belong to the same
+	 * package as BSP.  Further, we count number of
+	 * logical processors that belong to the same core
+	 * as BSP thus deducing number of threads per core.
+	 */
+	if (cpu_high >= 0x4) {
+		cpuid_count(0x04, 0, p);
+		max_cores = ((p[0] >> 26) & 0x3f) + 1;
+	} else
+		max_cores = 1;
+	core_id_bits = mask_width(max_logical/max_cores);
+	if (core_id_bits < 0)
+		return;
+	pkg_id_bits = core_id_bits + mask_width(max_cores);
+
+	for (id = 0; id <= MAX_APIC_ID; id++) {
+		/* Check logical CPU availability. */
+		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
+			continue;
+		/* Check if logical CPU has the same package ID. */
+		if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
+			continue;
+		cpu_cores++;
+		/* Check if logical CPU has the same package and core IDs. */
+		if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
+			cpu_logical++;
+	}
+
+	KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
+	    ("topo_probe_0x4 couldn't find BSP"));
+
+	cpu_cores /= cpu_logical;
+	hyperthreading_cpus = cpu_logical;
+}
+
+static void
+topo_probe_0xb(void)
+{
+	u_int p[4];
+	int bits;
+	int cnt;
+	int i;
+	int logical;
+	int type;
+	int x;
+
+	/* We only support three levels for now. */
+	for (i = 0; i < 3; i++) {
+		cpuid_count(0x0b, i, p);
+
+		/* Fall back if CPU leaf 11 doesn't really exist. */
+		if (i == 0 && p[1] == 0) {
+			topo_probe_0x4();
+			return;
+		}
+
+		bits = p[0] & 0x1f;
+		logical = p[1] &= 0xffff;
+		type = (p[2] >> 8) & 0xff;
+		if (type == 0 || logical == 0)
+			break;
+		/*
+		 * Because of uniformity assumption we examine only
+		 * those logical processors that belong to the same
+		 * package as BSP.
+		 */
+		for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
+			if (!cpu_info[x].cpu_present ||
+			    cpu_info[x].cpu_disabled)
+				continue;
+			if (x >> bits == boot_cpu_id >> bits)
+				cnt++;
+		}
+		if (type == CPUID_TYPE_SMT)
+			cpu_logical = cnt;
+		else if (type == CPUID_TYPE_CORE)
+			cpu_cores = cnt;
+	}
+	if (cpu_logical == 0)
+		cpu_logical = 1;
+	cpu_cores /= cpu_logical;
+}
+
+/*
+ * Both topology discovery code and code that consumes topology
+ * information assume top-down uniformity of the topology.
+ * That is, all physical packages must be identical and each
+ * core in a package must have the same number of threads.
+ * Topology information is queried only on BSP, on which this
+ * code runs and for which it can query CPUID information.
+ * Then topology is extrapolated on all packages using the
+ * uniformity assumption.
+ */
+static void
+topo_probe(void)
+{
+	static int cpu_topo_probed = 0;
+
+	if (cpu_topo_probed)
+		return;
+
+	CPU_ZERO(&logical_cpus_mask);
+	if (mp_ncpus <= 1)
+		cpu_cores = cpu_logical = 1;
+	else if (cpu_vendor_id == CPU_VENDOR_AMD)
+		topo_probe_amd();
+	else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
+		/*
+		 * See Intel(R) 64 Architecture Processor
+		 * Topology Enumeration article for details.
+		 *
+		 * Note that 0x1 <= cpu_high < 4 case should be
+		 * compatible with topo_probe_0x4() logic when
+		 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
+		 * or it should trigger the fallback otherwise.
+		 */
+		if (cpu_high >= 0xb)
+			topo_probe_0xb();
+		else if (cpu_high >= 0x1)
+			topo_probe_0x4();
+	}
+
+	/*
+	 * Fallback: assume each logical CPU is in separate
+	 * physical package.  That is, no multi-core, no SMT.
+	 */
+	if (cpu_cores == 0 || cpu_logical == 0)
+		cpu_cores = cpu_logical = 1;
+	cpu_topo_probed = 1;
+}
+
+struct cpu_group *
+cpu_topo(void)
+{
+	int cg_flags;
+
+	/*
+	 * Determine whether any threading flags are
+	 * necessry.
+	 */
+	topo_probe();
+	if (cpu_logical > 1 && hyperthreading_cpus)
+		cg_flags = CG_FLAG_HTT;
+	else if (cpu_logical > 1)
+		cg_flags = CG_FLAG_SMT;
+	else
+		cg_flags = 0;
+	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
+		printf("WARNING: Non-uniform processors.\n");
+		printf("WARNING: Using suboptimal topology.\n");
+		return (smp_topo_none());
+	}
+	/*
+	 * No multi-core or hyper-threaded.
+	 */
+	if (cpu_logical * cpu_cores == 1)
+		return (smp_topo_none());
+	/*
+	 * Only HTT no multi-core.
+	 */
+	if (cpu_logical > 1 && cpu_cores == 1)
+		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
+	/*
+	 * Only multi-core no HTT.
+	 */
+	if (cpu_cores > 1 && cpu_logical == 1)
+		return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
+	/*
+	 * Both HTT and multi-core.
+	 */
+	return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
+	    CG_SHARE_L1, cpu_logical, cg_flags));
+}
+
+/*
+ * Calculate usable address in base memory for AP trampoline code.
+ */
+u_int
+mp_bootaddress(u_int basemem)
+{
+
+	bootMP_size = mptramp_end - mptramp_start;
+	boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
+	if (((basemem * 1024) - boot_address) < bootMP_size)
+		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
+	/* 3 levels of page table pages */
+	mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
+
+	return mptramp_pagetables;
+}
+
+void
+cpu_add(u_int apic_id, char boot_cpu)
+{
+
+	if (apic_id > MAX_APIC_ID) {
+		panic("SMP: APIC ID %d too high", apic_id);
+		return;
+	}
+	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
+	    apic_id));
+	cpu_info[apic_id].cpu_present = 1;
+	if (boot_cpu) {
+		KASSERT(boot_cpu_id == -1,
+		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
+		    boot_cpu_id));
+		boot_cpu_id = apic_id;
+		cpu_info[apic_id].cpu_bsp = 1;
+	}
+	if (mp_ncpus < MAXCPU) {
+		mp_ncpus++;
+		mp_maxid = mp_ncpus - 1;
+	}
+	if (bootverbose)
+		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
+		    "AP");
+}
+
+void
+cpu_mp_setmaxid(void)
+{
+
+	/*
+	 * mp_maxid should be already set by calls to cpu_add().
+	 * Just sanity check its value here.
+	 */
+	if (mp_ncpus == 0)
+		KASSERT(mp_maxid == 0,
+		    ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
+	else if (mp_ncpus == 1)
+		mp_maxid = 0;
+	else
+		KASSERT(mp_maxid >= mp_ncpus - 1,
+		    ("%s: counters out of sync: max %d, count %d", __func__,
+			mp_maxid, mp_ncpus));
+}
+
+int
+cpu_mp_probe(void)
+{
+
+	/*
+	 * Always record BSP in CPU map so that the mbuf init code works
+	 * correctly.
+	 */
+	CPU_SETOF(0, &all_cpus);
+	if (mp_ncpus == 0) {
+		/*
+		 * No CPUs were found, so this must be a UP system.  Setup
+		 * the variables to represent a system with a single CPU
+		 * with an id of 0.
+		 */
+		mp_ncpus = 1;
+		return (0);
+	}
+
+	/* At least one CPU was found. */
+	if (mp_ncpus == 1) {
+		/*
+		 * One CPU was found, so this must be a UP system with
+		 * an I/O APIC.
+		 */
+		mp_maxid = 0;
+		return (0);
+	}
+
+	/* At least two CPUs were found. */
+	return (1);
+}
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void)
+{
+	int i;
+
+	/* Initialize the logical ID to APIC ID table. */
+	for (i = 0; i < MAXCPU; i++) {
+		cpu_apic_ids[i] = -1;
+		cpu_ipi_pending[i] = 0;
+	}
+
+	/* Install an inter-CPU IPI for TLB invalidation */
+	setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
+	setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Install an inter-CPU IPI for cache invalidation. */
+	setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Install an inter-CPU IPI for all-CPU rendezvous */
+	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Install generic inter-CPU IPI handler */
+	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
+	       SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Install an inter-CPU IPI for CPU stop/restart */
+	setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Install an inter-CPU IPI for CPU suspend/resume */
+	setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
+
+	/* Set boot_cpu_id if needed. */
+	if (boot_cpu_id == -1) {
+		boot_cpu_id = PCPU_GET(apic_id);
+		cpu_info[boot_cpu_id].cpu_bsp = 1;
+	} else
+		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
+		    ("BSP's APIC ID doesn't match boot_cpu_id"));
+
+	/* Probe logical/physical core configuration. */
+	topo_probe();
+
+	assign_cpu_ids();
+
+	/* Start each Application Processor */
+	start_all_aps();
+
+	set_interrupt_apic_ids();
+}
+
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+cpu_mp_announce(void)
+{
+	const char *hyperthread;
+	int i;
+
+	printf("FreeBSD/SMP: %d package(s) x %d core(s)",
+	    mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
+	if (hyperthreading_cpus > 1)
+	    printf(" x %d HTT threads", cpu_logical);
+	else if (cpu_logical > 1)
+	    printf(" x %d SMT threads", cpu_logical);
+	printf("\n");
+
+	/* List active CPUs first. */
+	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
+	for (i = 1; i < mp_ncpus; i++) {
+		if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
+			hyperthread = "/HT";
+		else
+			hyperthread = "";
+		printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
+		    cpu_apic_ids[i]);
+	}
+
+	/* List disabled CPUs last. */
+	for (i = 0; i <= MAX_APIC_ID; i++) {
+		if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
+			continue;
+		if (cpu_info[i].cpu_hyperthread)
+			hyperthread = "/HT";
+		else
+			hyperthread = "";
+		printf("  cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
+		    i);
+	}
+}
+
+/*
+ * AP CPU's call this to initialize themselves.
+ */
+void
+init_secondary(void)
+{
+	struct pcpu *pc;
+	struct nmi_pcpu *np;
+	u_int64_t msr, cr0;
+	u_int cpuid;
+	int cpu, gsel_tss, x;
+	struct region_descriptor ap_gdt;
+
+	/* Set by the startup code for us to use */
+	cpu = bootAP;
+
+	/* Init tss */
+	common_tss[cpu] = common_tss[0];
+	common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
+	common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
+	    IOPAGES * PAGE_SIZE;
+	common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
+
+	/* The NMI stack runs on IST2. */
+	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
+	common_tss[cpu].tss_ist2 = (long) np;
+
+	/* Prepare private GDT */
+	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
+	for (x = 0; x < NGDT; x++) {
+		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
+		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
+			ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
+	}
+	ssdtosyssd(&gdt_segs[GPROC0_SEL],
+	    (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
+	ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+	ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
+	lgdt(&ap_gdt);			/* does magic intra-segment return */
+
+	/* Get per-cpu data */
+	pc = &__pcpu[cpu];
+
+	/* prime data page for it to use */
+	pcpu_init(pc, cpu, sizeof(struct pcpu));
+	dpcpu_init(dpcpu, cpu);
+	pc->pc_apic_id = cpu_apic_ids[cpu];
+	pc->pc_prvspace = pc;
+	pc->pc_curthread = 0;
+	pc->pc_tssp = &common_tss[cpu];
+	pc->pc_commontssp = &common_tss[cpu];
+	pc->pc_rsp0 = 0;
+	pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
+	    GPROC0_SEL];
+	pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
+	pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
+	pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
+	    GUSERLDT_SEL];
+
+	/* Save the per-cpu pointer for use by the NMI handler. */
+	np->np_pcpu = (register_t) pc;
+
+	wrmsr(MSR_FSBASE, 0);		/* User value */
+	wrmsr(MSR_GSBASE, (u_int64_t)pc);
+	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
+
+	lidt(&r_idt);
+
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	ltr(gsel_tss);
+
+	/*
+	 * Set to a known state:
+	 * Set by mpboot.s: CR0_PG, CR0_PE
+	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
+	 */
+	cr0 = rcr0();
+	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
+	load_cr0(cr0);
+
+	/* Set up the fast syscall stuff */
+	msr = rdmsr(MSR_EFER) | EFER_SCE;
+	wrmsr(MSR_EFER, msr);
+	wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
+	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
+	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
+	      ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
+	wrmsr(MSR_STAR, msr);
+	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
+
+	/* Disable local APIC just to be sure. */
+	lapic_disable();
+
+	/* signal our startup to the BSP. */
+	mp_naps++;
+
+	/* Spin until the BSP releases the AP's. */
+	while (!aps_ready)
+		ia32_pause();
+
+	/* Initialize the PAT MSR. */
+	pmap_init_pat();
+
+	/* set up CPU registers and state */
+	cpu_setregs();
+
+	/* set up SSE/NX registers */
+	initializecpu();
+
+	/* set up FPU state on the AP */
+	fpuinit();
+
+	/* A quick check from sanity claus */
+	cpuid = PCPU_GET(cpuid);
+	if (PCPU_GET(apic_id) != lapic_id()) {
+		printf("SMP: cpuid = %d\n", cpuid);
+		printf("SMP: actual apic_id = %d\n", lapic_id());
+		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
+		panic("cpuid mismatch! boom!!");
+	}
+
+	/* Initialize curthread. */
+	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
+	PCPU_SET(curthread, PCPU_GET(idlethread));
+
+	mca_init();
+
+	mtx_lock_spin(&ap_boot_mtx);
+
+	/* Init local apic for irq's */
+	lapic_setup(1);
+
+	/* Set memory range attributes for this CPU to match the BSP */
+	mem_range_AP_init();
+
+	smp_cpus++;
+
+	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
+	printf("SMP: AP CPU #%d Launched!\n", cpuid);
+
+	/* Determine if we are a logical CPU. */
+	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
+	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
+		CPU_SET(cpuid, &logical_cpus_mask);
+
+	if (bootverbose)
+		lapic_dump("AP");
+
+	if (smp_cpus == mp_ncpus) {
+		/* enable IPI's, tlb shootdown, freezes etc */
+		atomic_store_rel_int(&smp_started, 1);
+		smp_active = 1;	 /* historic */
+	}
+
+	/*
+	 * Enable global pages TLB extension
+	 * This also implicitly flushes the TLB 
+	 */
+
+	load_cr4(rcr4() | CR4_PGE);
+	load_ds(_udatasel);
+	load_es(_udatasel);
+	load_fs(_ufssel);
+	mtx_unlock_spin(&ap_boot_mtx);
+
+	/* Wait until all the AP's are up. */
+	while (smp_started == 0)
+		ia32_pause();
+
+	/* Start per-CPU event timers. */
+	cpu_initclocks_ap();
+
+	sched_throw(NULL);
+
+	panic("scheduler returned us to %s", __func__);
+	/* NOTREACHED */
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts.  If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
+ */
+static void
+set_interrupt_apic_ids(void)
+{
+	u_int i, apic_id;
+
+	for (i = 0; i < MAXCPU; i++) {
+		apic_id = cpu_apic_ids[i];
+		if (apic_id == -1)
+			continue;
+		if (cpu_info[apic_id].cpu_bsp)
+			continue;
+		if (cpu_info[apic_id].cpu_disabled)
+			continue;
+
+		/* Don't let hyperthreads service interrupts. */
+		if (hyperthreading_cpus > 1 &&
+		    apic_id % hyperthreading_cpus != 0)
+			continue;
+
+		intr_add_cpu(i);
+	}
+}
+
+/*
+ * Assign logical CPU IDs to local APICs.
+ */
+static void
+assign_cpu_ids(void)
+{
+	u_int i;
+
+	TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
+	    &hyperthreading_allowed);
+
+	/* Check for explicitly disabled CPUs. */
+	for (i = 0; i <= MAX_APIC_ID; i++) {
+		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
+			continue;
+
+		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
+			cpu_info[i].cpu_hyperthread = 1;
+
+			/*
+			 * Don't use HT CPU if it has been disabled by a
+			 * tunable.
+			 */
+			if (hyperthreading_allowed == 0) {
+				cpu_info[i].cpu_disabled = 1;
+				continue;
+			}
+		}
+
+		/* Don't use this CPU if it has been disabled by a tunable. */
+		if (resource_disabled("lapic", i)) {
+			cpu_info[i].cpu_disabled = 1;
+			continue;
+		}
+	}
+
+	if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+		hyperthreading_cpus = 0;
+		cpu_logical = 1;
+	}
+
+	/*
+	 * Assign CPU IDs to local APIC IDs and disable any CPUs
+	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
+	 *
+	 * To minimize confusion for userland, we attempt to number
+	 * CPUs such that all threads and cores in a package are
+	 * grouped together.  For now we assume that the BSP is always
+	 * the first thread in a package and just start adding APs
+	 * starting with the BSP's APIC ID.
+	 */
+	mp_ncpus = 1;
+	cpu_apic_ids[0] = boot_cpu_id;
+	apic_cpuids[boot_cpu_id] = 0;
+	for (i = boot_cpu_id + 1; i != boot_cpu_id;
+	     i == MAX_APIC_ID ? i = 0 : i++) {
+		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
+		    cpu_info[i].cpu_disabled)
+			continue;
+
+		if (mp_ncpus < MAXCPU) {
+			cpu_apic_ids[mp_ncpus] = i;
+			apic_cpuids[i] = mp_ncpus;
+			mp_ncpus++;
+		} else
+			cpu_info[i].cpu_disabled = 1;
+	}
+	KASSERT(mp_maxid >= mp_ncpus - 1,
+	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
+	    mp_ncpus));		
+}
+
+/*
+ * start each AP in our list
+ */
+static int
+start_all_aps(void)
+{
+	vm_offset_t va = boot_address + KERNBASE;
+	u_int64_t *pt4, *pt3, *pt2;
+	u_int32_t mpbioswarmvec;
+	int apic_id, cpu, i;
+	u_char mpbiosreason;
+
+	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+
+	/* install the AP 1st level boot code */
+	pmap_kenter(va, boot_address);
+	pmap_invalidate_page(kernel_pmap, va);
+	bcopy(mptramp_start, (void *)va, bootMP_size);
+
+	/* Locate the page tables, they'll be below the trampoline */
+	pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
+	pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
+	pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
+
+	/* Create the initial 1GB replicated page tables */
+	for (i = 0; i < 512; i++) {
+		/* Each slot of the level 4 pages points to the same level 3 page */
+		pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
+		pt4[i] |= PG_V | PG_RW | PG_U;
+
+		/* Each slot of the level 3 pages points to the same level 2 page */
+		pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
+		pt3[i] |= PG_V | PG_RW | PG_U;
+
+		/* The level 2 page slots are mapped with 2MB pages for 1GB. */
+		pt2[i] = i * (2 * 1024 * 1024);
+		pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+	}
+
+	/* save the current value of the warm-start vector */
+	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
+	outb(CMOS_REG, BIOS_RESET);
+	mpbiosreason = inb(CMOS_DATA);
+
+	/* setup a vector to our boot code */
+	*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+	*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+	outb(CMOS_REG, BIOS_RESET);
+	outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
+
+	/* start each AP */
+	for (cpu = 1; cpu < mp_ncpus; cpu++) {
+		apic_id = cpu_apic_ids[cpu];
+
+		/* allocate and set up an idle stack data page */
+		bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
+		doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
+		nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
+		dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE);
+
+		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
+		bootAP = cpu;
+
+		/* attempt to start the Application Processor */
+		if (!start_ap(apic_id)) {
+			/* restore the warmstart vector */
+			*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
+			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
+		}
+
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
+	}
+
+	/* restore the warmstart vector */
+	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
+
+	outb(CMOS_REG, BIOS_RESET);
+	outb(CMOS_DATA, mpbiosreason);
+
+	/* number of APs actually started */
+	return mp_naps;
+}
+
+
+/*
+ * This function starts the AP (application processor) identified
+ * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
+ * to accomplish this.  This is necessary because of the nuances
+ * of the different hardware we might encounter.  It isn't pretty,
+ * but it seems to work.
+ */
+static int
+start_ap(int apic_id)
+{
+	int vector, ms;
+	int cpus;
+
+	/* calculate the vector */
+	vector = (boot_address >> 12) & 0xff;
+
+	/* used as a watchpoint to signal AP startup */
+	cpus = mp_naps;
+
+	ipi_startup(apic_id, vector);
+
+	/* Wait up to 5 seconds for it to start. */
+	for (ms = 0; ms < 5000; ms++) {
+		if (mp_naps > cpus)
+			return 1;	/* return SUCCESS */
+		DELAY(1000);
+	}
+	return 0;		/* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+    sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+    sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+    sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
+    &ipi_range_size, 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+    &ipi_masked_global, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+    &ipi_masked_page, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+    &ipi_masked_range, 0, "");
+SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+    &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
+	/*
+	 * first we do an INIT IPI: this INIT IPI might be run, resetting
+	 * and running the target CPU. OR this INIT IPI might be latched (P5
+	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+	 * ignored.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(10000);		/* wait ~10mS */
+
+	/*
+	 * next we do a STARTUP IPI: the previous INIT IPI might still be
+	 * latched, (P5 bug) this 1st STARTUP would then terminate
+	 * immediately, and the previously started INIT IPI would continue. OR
+	 * the previous INIT IPI has already run. and this STARTUP IPI will
+	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+	 * will run.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+
+	/*
+	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+	 * recognized after hardware RESET or INIT IPI.
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    vector, apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+}
+
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending)); 
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
+/*
+ * Flush the TLB on all other CPU's
+ */
+static void
+smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	u_int ncpu;
+
+	ncpu = mp_ncpus - 1;	/* does not shootdown self */
+	if (ncpu < 1)
+		return;		/* no other cpus */
+	if (!(read_rflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
+	smp_tlb_addr1 = addr1;
+	smp_tlb_addr2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	ipi_all_but_self(vector);
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	int cpu, ncpu, othercpus;
+
+	othercpus = mp_ncpus - 1;
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
+			return;
+	} else {
+		CPU_CLR(PCPU_GET(cpuid), &mask);
+		if (CPU_EMPTY(&mask))
+			return;
+	}
+	if (!(read_rflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
+	smp_tlb_addr1 = addr1;
+	smp_tlb_addr2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
+		ipi_all_but_self(vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+			    cpu, vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
+}
+
+void
+smp_cache_flush(void)
+{
+
+	if (smp_started)
+		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
+}
+
+void
+smp_invltlb(void)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_global++;
+#endif
+	}
+}
+
+void
+smp_invlpg(vm_offset_t addr)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_page++;
+#endif
+	}
+}
+
+void
+smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_range++;
+		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+void
+smp_masked_invltlb(cpuset_t mask)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_global++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_page++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_range++;
+		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+void
+ipi_bitmap_handler(struct trapframe frame)
+{
+	struct trapframe *oldframe;
+	struct thread *td;
+	int cpu = PCPU_GET(cpuid);
+	u_int ipi_bitmap;
+
+	critical_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	oldframe = td->td_intr_frame;
+	td->td_intr_frame = &frame;
+	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
+	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
+#ifdef COUNT_IPIS
+		(*ipi_preempt_counts[cpu])++;
+#endif
+		sched_preempt(td);
+	}
+	if (ipi_bitmap & (1 << IPI_AST)) {
+#ifdef COUNT_IPIS
+		(*ipi_ast_counts[cpu])++;
+#endif
+		/* Nothing to do for AST */
+	}
+	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
+#ifdef COUNT_IPIS
+		(*ipi_hardclock_counts[cpu])++;
+#endif
+		hardclockintr();
+	}
+	td->td_intr_frame = oldframe;
+	td->td_intr_nesting_level--;
+	critical_exit();
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+ipi_selected(cpuset_t cpus, u_int ipi)
+{
+	int cpu;
+
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
+
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+		cpu--;
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
+		ipi_send_cpu(cpu, ipi);
+	}
+}
+
+/*
+ * send an IPI to a specific CPU.
+ */
+void
+ipi_cpu(int cpu, u_int ipi)
+{
+
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
+
+	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
+	ipi_send_cpu(cpu, ipi);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+ipi_all_but_self(u_int ipi)
+{
+	cpuset_t other_cpus;
+
+	other_cpus = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		ipi_selected(other_cpus, ipi);
+		return;
+	}
+
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+}
+
+int
+ipi_nmi_handler()
+{
+	u_int cpuid;
+
+	/*
+	 * As long as there is not a simple way to know about a NMI's
+	 * source, if the bitmask for the current CPU is present in
+	 * the global pending bitword an IPI_STOP_HARD has been issued
+	 * and should be handled.
+	 */
+	cpuid = PCPU_GET(cpuid);
+	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
+		return (1);
+
+	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
+	cpustop_handler();
+	return (0);
+}
+     
+/*
+ * Handle an IPI_STOP by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpustop_handler(void)
+{
+	u_int cpu;
+
+	cpu = PCPU_GET(cpuid);
+
+	savectx(&stoppcbs[cpu]);
+
+	/* Indicate that we are stopped */
+	CPU_SET_ATOMIC(cpu, &stopped_cpus);
+
+	/* Wait for restart */
+	while (!CPU_ISSET(cpu, &started_cpus))
+	    ia32_pause();
+
+	CPU_CLR_ATOMIC(cpu, &started_cpus);
+	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+
+	if (cpu == 0 && cpustop_restartfunc != NULL) {
+		cpustop_restartfunc();
+		cpustop_restartfunc = NULL;
+	}
+}
+
+/*
+ * Handle an IPI_SUSPEND by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpususpend_handler(void)
+{
+	u_int cpu;
+
+	cpu = PCPU_GET(cpuid);
+
+	if (savectx(susppcbs[cpu])) {
+		ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
+		wbinvd();
+		CPU_SET_ATOMIC(cpu, &suspended_cpus);
+	} else {
+		pmap_init_pat();
+		initializecpu();
+		PCPU_SET(switchtime, 0);
+		PCPU_SET(switchticks, ticks);
+
+		/* Indicate that we are resumed */
+		CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+	}
+
+	/* Wait for resume */
+	while (!CPU_ISSET(cpu, &started_cpus))
+		ia32_pause();
+
+	/* Resume MCA and local APIC */
+	mca_resume();
+	lapic_setup(0);
+
+	CPU_CLR_ATOMIC(cpu, &started_cpus);
+}
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+static void
+release_aps(void *dummy __unused)
+{
+
+	if (mp_ncpus == 1) 
+		return;
+	atomic_store_rel_int(&aps_ready, 1);
+	while (smp_started == 0)
+		ia32_pause();
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+
+#ifdef COUNT_IPIS
+/*
+ * Setup interrupt counters for IPI handlers.
+ */
+static void
+mp_ipi_intrcnt(void *dummy)
+{
+	char buf[64];
+	int i;
+
+	CPU_FOREACH(i) {
+		snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
+		intrcnt_add(buf, &ipi_invltlb_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
+		intrcnt_add(buf, &ipi_invlrng_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
+		intrcnt_add(buf, &ipi_invlpg_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
+		intrcnt_add(buf, &ipi_invlcache_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
+		intrcnt_add(buf, &ipi_preempt_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:ast", i);
+		intrcnt_add(buf, &ipi_ast_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
+		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
+		intrcnt_add(buf, &ipi_hardclock_counts[i]);
+	}
+}
+SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
+#endif
+
diff --git a/sys/amd64/amd64/mp_watchdog.c b/sys/amd64/amd64/mp_watchdog.c
new file mode 100644
index 0000000..5cbd649
--- /dev/null
+++ b/sys/amd64/amd64/mp_watchdog.c
@@ -0,0 +1,211 @@
+/*-
+ * Copyright (c) 2004 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_mp_watchdog.h"
+#include "opt_sched.h"
+
+#ifdef SCHED_ULE
+#error MP_WATCHDOG cannot currently be used with SCHED_ULE
+#endif
+
+#include <sys/param.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/smp.h>
+#include <x86/apicreg.h>
+#include <machine/apicvar.h>
+#include <machine/mp_watchdog.h>
+
+/*
+ * mp_watchdog hijacks the idle thread on a specified CPU, prevents new work
+ * from being scheduled there, and uses it as a "watchdog" to detect kernel
+ * failure on other CPUs.  This is made reasonable by inclusion of logical
+ * processors in Xeon hardware.  The watchdog is configured by setting the
+ * debug.watchdog sysctl/tunable to the CPU of interest.  A callout will then
+ * begin executing reseting a timer that is gradually lowered by the watching
+ * thread.  If the timer reaches 0, the watchdog fires by ether dropping
+ * directly to the debugger, or by sending an NMI IPI to the boot processor.
+ * This is a somewhat less efficient substitute for dedicated watchdog
+ * hardware, but can be quite an effective tool for debugging hangs.
+ *
+ * XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but
+ * doesn't yet.
+ */
+static int	watchdog_cpu = -1;
+static int	watchdog_dontfire = 1;
+static int	watchdog_timer = -1;
+static int	watchdog_nmi = 1;
+
+TUNABLE_INT("debug.watchdog", &watchdog_cpu);
+SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0,
+    "IPI the boot processor with an NMI to enter the debugger");
+
+static struct callout	watchdog_callout;
+
+static void watchdog_change(int wdcpu);
+
+/*
+ * Number of seconds before the watchdog will fire if the callout fails to
+ * reset the timer.
+ */
+#define	WATCHDOG_THRESHOLD	10
+
+static void
+watchdog_init(void *arg)
+{
+
+	callout_init(&watchdog_callout, CALLOUT_MPSAFE);
+	if (watchdog_cpu != -1)
+		watchdog_change(watchdog_cpu);
+}
+
+/*
+ * This callout resets a timer until the watchdog kicks in.  It acquires some
+ * critical locks to make sure things haven't gotten wedged with hose locks
+ * held.
+ */
+static void
+watchdog_function(void *arg)
+{
+
+	/*
+	 * Since the timer ran, we must not be wedged.  Acquire some critical
+	 * locks to make sure.  Then reset the timer.
+	 */
+	mtx_lock(&Giant);
+	watchdog_timer = WATCHDOG_THRESHOLD;
+	mtx_unlock(&Giant);
+	callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
+}
+SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL);
+
+static void
+watchdog_change(int wdcpu)
+{
+
+	if (wdcpu == -1 || wdcpu == 0xffffffff) {
+		/*
+		 * Disable the watchdog.
+		 */
+		watchdog_cpu = -1;
+		watchdog_dontfire = 1;
+		callout_stop(&watchdog_callout);
+		printf("watchdog stopped\n");
+	} else {
+		watchdog_timer = WATCHDOG_THRESHOLD;
+		watchdog_dontfire = 0;
+		watchdog_cpu = wdcpu;
+		callout_reset(&watchdog_callout, 1 * hz, watchdog_function,
+		    NULL);
+	}
+}
+
+/*
+ * This sysctl sets which CPU is the watchdog CPU.  Set to -1 or 0xffffffff
+ * to disable the watchdog.
+ */
+static int
+sysctl_watchdog(SYSCTL_HANDLER_ARGS)
+{
+	int error, temp;
+
+	temp = watchdog_cpu;
+	error = sysctl_handle_int(oidp, &temp, 0, req);
+	if (error)
+		return (error);
+
+	if (req->newptr != NULL)
+		watchdog_change(temp);
+	return (0);
+}
+SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+    sysctl_watchdog, "I", "");
+
+/*
+ * Drop into the debugger by sending an IPI NMI to the boot processor.
+ */
+static void
+watchdog_ipi_nmi(void)
+{
+
+	/*
+	 * Deliver NMI to the boot processor.  Why not?
+	 */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI,
+	    boot_cpu_id);
+	lapic_ipi_wait(-1);
+}
+
+/*
+ * ap_watchdog() is called by the SMP idle loop code.  It works on the same
+ * premise that the disabling of logical processors does: that if the cpu is
+ * idle, then it can ignore the world from then on, as nothing will be
+ * scheduled on it.  Leaving aside multi-runqueue schedulers (SCHED_ULE) and
+ * explicit process migration (sched_bind()), this is not an unreasonable
+ * assumption.
+ */
+void
+ap_watchdog(u_int cpuid)
+{
+	char old_pcomm[MAXCOMLEN + 1];
+	struct proc *p;
+
+	if (watchdog_cpu != cpuid)
+		return;
+
+	printf("watchdog started on cpu %d\n", cpuid);
+	p = curproc;
+	bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1);
+	snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid);
+	while (1) {
+		DELAY(1000000);				/* One second. */
+		if (watchdog_cpu != cpuid)
+			break;
+		atomic_subtract_int(&watchdog_timer, 1);
+		if (watchdog_timer < 4)
+			printf("Watchdog timer: %d\n", watchdog_timer);
+		if (watchdog_timer == 0 && watchdog_dontfire == 0) {
+			printf("Watchdog firing!\n");
+			watchdog_dontfire = 1;
+			if (watchdog_nmi)
+				watchdog_ipi_nmi();
+			else
+				kdb_enter(KDB_WHY_WATCHDOG, "mp_watchdog");
+		}
+	}
+	bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1);
+	printf("watchdog stopped on cpu %d\n", cpuid);
+}
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
new file mode 100644
index 0000000..ec30c72
--- /dev/null
+++ b/sys/amd64/amd64/mpboot.S
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>		/* miscellaneous asm macros */
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+	.data				/* So we can modify it */
+
+	.p2align 4,0
+	.globl	mptramp_start
+mptramp_start:
+	.code16
+	/*
+	 * The AP enters here in response to the startup IPI.
+	 * We are in real mode. %cs is the only segment register set.
+	 */
+	cli				/* make sure no interrupts */
+	mov	%cs, %ax		/* copy %cs to %ds.  Remember these */
+	mov	%ax, %ds		/* are offsets rather than selectors */
+	mov	%ax, %ss
+
+	/*
+	 * Find relocation base and patch the gdt descript and ljmp targets
+	 */
+	xorl	%ebx,%ebx
+	mov	%cs, %bx
+	sall	$4, %ebx		/* %ebx is now our relocation base */
+	orl	%ebx, lgdt_desc-mptramp_start+2
+	orl	%ebx, jmp_32-mptramp_start+2
+	orl	%ebx, jmp_64-mptramp_start+1
+
+	/*
+	 * Load the descriptor table pointer.  We'll need it when running
+	 * in 16 bit protected mode.
+	 */
+	lgdt	lgdt_desc-mptramp_start
+
+	/* Enable protected mode */
+	movl	$CR0_PE, %eax
+	mov	%eax, %cr0 
+
+	/*
+	 * Now execute a far jump to turn on protected mode.  This
+	 * causes the segment registers to turn into selectors and causes
+	 * %cs to be loaded from the gdt.
+	 *
+	 * The following instruction is:
+	 * ljmpl $bootcode-gdt, $protmode-mptramp_start
+	 * but gas cannot assemble that.  And besides, we patch the targets
+	 * in early startup and its a little clearer what we are patching.
+	 */
+jmp_32:
+	.byte	0x66			/* size override to 32 bits */
+	.byte	0xea			/* opcode for far jump */
+	.long	protmode-mptramp_start	/* offset in segment */
+	.word	bootcode-gdt		/* index in gdt for 32 bit code */
+
+	/*
+	 * At this point, we are running in 32 bit legacy protected mode.
+	 */
+	.code32
+protmode:
+	mov	$bootdata-gdt, %eax
+	mov	%ax, %ds
+
+	/* Turn on the PAE, PSE and PGE bits for when paging is enabled */
+	mov	%cr4, %eax
+	orl	$(CR4_PAE | CR4_PSE), %eax
+	mov	%eax, %cr4
+
+	/*
+	 * Enable EFER.LME so that we get long mode when all the prereqs are
+	 * in place.  In this case, it turns on when CR0_PG is finally enabled.
+	 * Pick up a few other EFER bits that we'll use need we're here.
+	 */
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	orl	$EFER_LME | EFER_SCE, %eax
+	wrmsr
+
+	/*
+	 * Point to the embedded page tables for startup.  Note that this
+	 * only gets accessed after we're actually in 64 bit mode, however
+	 * we can only set the bottom 32 bits of %cr3 in this state.  This
+	 * means we are required to use a temporary page table that is below
+	 * the 4GB limit.  %ebx is still our relocation base.  We could just
+	 * subtract 3 * PAGE_SIZE, but that would be too easy.
+	 */
+	leal	mptramp_pagetables-mptramp_start(%ebx),%eax
+	movl	(%eax), %eax
+	mov	%eax, %cr3
+
+	/*
+	 * Finally, switch to long bit mode by enabling paging.  We have
+	 * to be very careful here because all the segmentation disappears
+	 * out from underneath us.  The spec says we can depend on the
+	 * subsequent pipelined branch to execute, but *only if* everthing
+	 * is still identity mapped.  If any mappings change, the pipeline
+	 * will flush.
+	 */
+	mov	%cr0, %eax
+	orl	$CR0_PG, %eax
+	mov	%eax, %cr0
+
+	/*
+	 * At this point paging is enabled, and we are in "compatability" mode.
+	 * We do another far jump to reload %cs with the 64 bit selector.
+	 * %cr3 points to a 4-level page table page.
+	 * We cannot yet jump all the way to the kernel because we can only
+	 * specify a 32 bit linear address.  So, yet another trampoline.
+	 *
+	 * The following instruction is:
+	 * ljmp $kernelcode-gdt, $tramp_64-mptramp_start
+	 * but gas cannot assemble that.  And besides, we patch the targets
+	 * in early startup and its a little clearer what we are patching.
+	 */
+jmp_64:
+	.byte	0xea			/* opcode for far jump */
+	.long	tramp_64-mptramp_start	/* offset in segment */
+	.word	kernelcode-gdt		/* index in gdt for 64 bit code */
+
+	/*
+	 * Yeehar!  We're running in 64 bit mode!  We can mostly ignore our
+	 * segment registers, and get on with it.
+	 * Note that we are running at the correct virtual address, but with
+	 * a 1:1 1GB mirrored mapping over entire address space.  We had better
+	 * switch to a real %cr3 promptly so that we can get to the direct map
+	 * space. Remember that jmp is relative and that we've been relocated,
+	 * so use an indirect jump.
+	 */
+	.code64
+tramp_64:
+	movabsq	$entry_64,%rax		/* 64 bit immediate load */
+	jmp	*%rax
+
+	.p2align 4,0
+gdt:
+	/*
+	 * All segment descriptor tables start with a null descriptor
+	 */
+	.long	0x00000000
+	.long	0x00000000
+
+	/*
+	 * This is the 64 bit long mode code descriptor.  There is no
+	 * 64 bit data descriptor.
+	 */
+kernelcode:
+	.long	0x00000000
+	.long	0x00209800
+
+	/*
+	 * This is the descriptor for the 32 bit boot code.
+	 * %cs:  +A, +R, -C, DPL=0, +P, +D, +G
+	 * Accessed, Readable, Present, 32 bit, 4G granularity
+	 */
+bootcode:
+	.long	0x0000ffff
+	.long	0x00cf9b00
+
+	/*
+	 * This is the descriptor for the 32 bit boot data.
+	 * We load it into %ds and %ss.  The bits for each selector
+	 * are interpreted slightly differently.
+	 * %ds:  +A, +W, -E, DPL=0, +P, +D, +G
+	 * %ss:  +A, +W, -E, DPL=0, +P, +B, +G
+	 * Accessed, Writeable, Expand up, Present, 32 bit, 4GB 
+	 * For %ds, +D means 'default operand size is 32 bit'.
+	 * For %ss, +B means the stack register is %esp rather than %sp.
+	 */
+bootdata:
+	.long	0x0000ffff
+	.long	0x00cf9300
+
+gdtend:
+
+	/*
+	 * The address of our page table pages that the boot code
+	 * uses to trampoline up to kernel address space.
+	 */
+	.globl	mptramp_pagetables
+mptramp_pagetables:
+	.long	0
+
+	/*
+	 * The pseudo descriptor for lgdt to use.
+	 */
+lgdt_desc:	
+	.word	gdtend-gdt		/* Length */
+	.long	gdt-mptramp_start	/* Offset plus %ds << 4 */
+
+	.globl	mptramp_end
+mptramp_end:
+
+	/*
+	 * From here on down is executed in the kernel .text section.
+	 *
+	 * Load a real %cr3 that has all the direct map stuff and switches
+	 * off the 1GB replicated mirror.  Load a stack pointer and jump
+	 * into AP startup code in C.
+	 */
+	.text
+	.code64
+	.p2align 4,0
+entry_64:
+	movq	KPML4phys, %rax
+	movq	%rax, %cr3
+	movq	bootSTK, %rsp
+	jmp	init_secondary
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
new file mode 100644
index 0000000..1b1c86c
--- /dev/null
+++ b/sys/amd64/amd64/pmap.c
@@ -0,0 +1,5538 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ *	Manages physical address maps.
+ *
+ *	Since the information managed by this module is
+ *	also stored by the logical address mapping module,
+ *	this module may throw away valid virtual-to-physical
+ *	mappings at almost any time.  However, invalidations
+ *	of virtual-to-physical mappings must be done as
+ *	requested.
+ *
+ *	In order to cope with hardware architectures which
+ *	make virtual-to-physical map invalidates expensive,
+ *	this module may delay invalidate or reduced protection
+ *	operations until such time as they are actually
+ *	necessary.  This module is given full information as
+ *	to which processors are currently using which maps,
+ *	and to when physical maps must be made correct.
+ */
+
+#include "opt_pmap.h"
+#include "opt_vm.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/sx.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#ifdef SMP
+#include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_radix.h>
+#include <vm/vm_reserv.h>
+#include <vm/uma.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#if !defined(DIAGNOSTIC)
+#ifdef __GNUC_GNU_INLINE__
+#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
+#else
+#define PMAP_INLINE	extern inline
+#endif
+#else
+#define PMAP_INLINE
+#endif
+
+#ifdef PV_STATS
+#define PV_STAT(x)	do { x ; } while (0)
+#else
+#define PV_STAT(x)	do { } while (0)
+#endif
+
+#define	pa_index(pa)	((pa) >> PDRSHIFT)
+#define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
+
+#define	NPV_LIST_LOCKS	MAXCPU
+
+#define	PHYS_TO_PV_LIST_LOCK(pa)	\
+			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+
+#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
+	struct rwlock **_lockp = (lockp);		\
+	struct rwlock *_new_lock;			\
+							\
+	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
+	if (_new_lock != *_lockp) {			\
+		if (*_lockp != NULL)			\
+			rw_wunlock(*_lockp);		\
+		*_lockp = _new_lock;			\
+		rw_wlock(*_lockp);			\
+	}						\
+} while (0)
+
+#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
+			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+
+#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
+	struct rwlock **_lockp = (lockp);		\
+							\
+	if (*_lockp != NULL) {				\
+		rw_wunlock(*_lockp);			\
+		*_lockp = NULL;				\
+	}						\
+} while (0)
+
+#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
+			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+
+struct pmap kernel_pmap_store;
+
+vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
+
+int nkpt;
+SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0,
+    "Number of kernel page table pages allocated on bootup");
+
+static int ndmpdp;
+static vm_paddr_t dmaplimit;
+vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
+pt_entry_t pg_nx;
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
+static int pat_works = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
+    "Is page attribute table fully functional?");
+
+static int pg_ps_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
+    "Are large page mappings enabled?");
+
+#define	PAT_INDEX_SIZE	8
+static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
+
+static u_int64_t	KPTphys;	/* phys addr of kernel level 1 */
+static u_int64_t	KPDphys;	/* phys addr of kernel level 2 */
+u_int64_t		KPDPphys;	/* phys addr of kernel level 3 */
+u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
+
+static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
+static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
+
+static struct rwlock_padalign pvh_global_lock;
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+static struct mtx pv_chunks_mutex;
+static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static struct md_page *pv_table;
+
+/*
+ * All those kernel PT submaps that BSD is so fond of
+ */
+pt_entry_t *CMAP1 = 0;
+caddr_t CADDR1 = 0;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+
+static void	free_pv_chunk(struct pv_chunk *pc);
+static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static int	popcnt_pc_map_elem(uint64_t elem);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
+static void	reserve_pv_entries(pmap_t pmap, int needed,
+		    struct rwlock **lockp);
+static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+		    struct rwlock **lockp);
+static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+		    struct rwlock **lockp);
+static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+		    struct rwlock **lockp);
+static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
+static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
+		    vm_offset_t va);
+static int	pmap_pvh_wired_mappings(struct md_page *pvh, int count);
+
+static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
+static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
+    vm_offset_t va, struct rwlock **lockp);
+static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
+    vm_offset_t va);
+static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    vm_prot_t prot, struct rwlock **lockp);
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
+static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
+static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
+static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp);
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
+    vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
+static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+		vm_page_t *free, struct rwlock **lockp);
+static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
+		vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free,
+		struct rwlock **lockp);
+static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    vm_page_t *free);
+static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
+    vm_page_t m, struct rwlock **lockp);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
+
+static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
+		struct rwlock **lockp);
+static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
+		struct rwlock **lockp);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
+		struct rwlock **lockp);
+
+static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
+                vm_page_t *free);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
+static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+
+CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
+CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+
+/*
+ * Move the kernel virtual free pointer to the next
+ * 2MB.  This is used to help improve performance
+ * by using a large (2MB) page for much of the kernel
+ * (.text, .data, .bss)
+ */
+static vm_offset_t
+pmap_kmem_choose(vm_offset_t addr)
+{
+	vm_offset_t newaddr = addr;
+
+	newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
+	return (newaddr);
+}
+
+/********************/
+/* Inline functions */
+/********************/
+
+/* Return a non-clipped PD index for a given VA */
+static __inline vm_pindex_t
+pmap_pde_pindex(vm_offset_t va)
+{
+	return (va >> PDRSHIFT);
+}
+
+
+/* Return various clipped indexes for a given VA */
+static __inline vm_pindex_t
+pmap_pte_index(vm_offset_t va)
+{
+
+	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pde_index(vm_offset_t va)
+{
+
+	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pdpe_index(vm_offset_t va)
+{
+
+	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pml4e_index(vm_offset_t va)
+{
+
+	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
+}
+
+/* Return a pointer to the PML4 slot that corresponds to a VA */
+static __inline pml4_entry_t *
+pmap_pml4e(pmap_t pmap, vm_offset_t va)
+{
+
+	return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+
+	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
+	return (&pdpe[pmap_pdpe_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pdpe(pmap_t pmap, vm_offset_t va)
+{
+	pml4_entry_t *pml4e;
+
+	pml4e = pmap_pml4e(pmap, va);
+	if ((*pml4e & PG_V) == 0)
+		return (NULL);
+	return (pmap_pml4e_to_pdpe(pml4e, va));
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va)
+{
+	pd_entry_t *pde;
+
+	pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
+	return (&pde[pmap_pde_index(va)]);
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+
+	pdpe = pmap_pdpe(pmap, va);
+	if (pdpe == NULL || (*pdpe & PG_V) == 0)
+		return (NULL);
+	return (pmap_pdpe_to_pde(pdpe, va));
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
+{
+	pt_entry_t *pte;
+
+	pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+	return (&pte[pmap_pte_index(va)]);
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t *pde;
+
+	pde = pmap_pde(pmap, va);
+	if (pde == NULL || (*pde & PG_V) == 0)
+		return (NULL);
+	if ((*pde & PG_PS) != 0)	/* compat with i386 pmap_pte() */
+		return ((pt_entry_t *)pde);
+	return (pmap_pde_to_pte(pde, va));
+}
+
+static __inline void
+pmap_resident_count_inc(pmap_t pmap, int count)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	pmap->pm_stats.resident_count += count;
+}
+
+static __inline void
+pmap_resident_count_dec(pmap_t pmap, int count)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	pmap->pm_stats.resident_count -= count;
+}
+
+PMAP_INLINE pt_entry_t *
+vtopte(vm_offset_t va)
+{
+	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+	return (PTmap + ((va >> PAGE_SHIFT) & mask));
+}
+
+static __inline pd_entry_t *
+vtopde(vm_offset_t va)
+{
+	u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+	return (PDmap + ((va >> PDRSHIFT) & mask));
+}
+
+static u_int64_t
+allocpages(vm_paddr_t *firstaddr, int n)
+{
+	u_int64_t ret;
+
+	ret = *firstaddr;
+	bzero((void *)ret, n * PAGE_SIZE);
+	*firstaddr += n * PAGE_SIZE;
+	return (ret);
+}
+
+CTASSERT(powerof2(NDMPML4E));
+
+/* number of kernel PDP slots */
+#define	NKPDPE(ptpgs)		howmany((ptpgs), NPDEPG)
+
+static void
+nkpt_init(vm_paddr_t addr)
+{
+	int pt_pages;
+	
+#ifdef NKPT
+	pt_pages = NKPT;
+#else
+	pt_pages = howmany(addr, 1 << PDRSHIFT);
+	pt_pages += NKPDPE(pt_pages);
+
+	/*
+	 * Add some slop beyond the bare minimum required for bootstrapping
+	 * the kernel.
+	 *
+	 * This is quite important when allocating KVA for kernel modules.
+	 * The modules are required to be linked in the negative 2GB of
+	 * the address space.  If we run out of KVA in this region then
+	 * pmap_growkernel() will need to allocate page table pages to map
+	 * the entire 512GB of KVA space which is an unnecessary tax on
+	 * physical memory.
+	 */
+	pt_pages += 8;		/* 16MB additional slop for kernel modules */
+#endif
+	nkpt = pt_pages;
+}
+
+static void
+create_pagetables(vm_paddr_t *firstaddr)
+{
+	int i, j, ndm1g, nkpdpe;
+
+	/* Allocate page table pages for the direct map */
+	ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
+	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
+		ndmpdp = 4;
+	DMPDPphys = allocpages(firstaddr, NDMPML4E);
+	ndm1g = 0;
+	if ((amd_feature & AMDID_PAGE1GB) != 0)
+		ndm1g = ptoa(Maxmem) >> PDPSHIFT;
+	if (ndm1g < ndmpdp)
+		DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
+	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
+
+	/* Allocate pages */
+	KPML4phys = allocpages(firstaddr, 1);
+	KPDPphys = allocpages(firstaddr, NKPML4E);
+
+	/*
+	 * Allocate the initial number of kernel page table pages required to
+	 * bootstrap.  We defer this until after all memory-size dependent
+	 * allocations are done (e.g. direct map), so that we don't have to
+	 * build in too much slop in our estimate.
+	 */
+	nkpt_init(*firstaddr);
+	nkpdpe = NKPDPE(nkpt);
+
+	KPTphys = allocpages(firstaddr, nkpt);
+	KPDphys = allocpages(firstaddr, nkpdpe);
+
+	/* Fill in the underlying page table pages */
+	/* Read-only from zero to physfree */
+	/* XXX not fully used, underneath 2M pages */
+	for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) {
+		((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
+		((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G;
+	}
+
+	/* Now map the page tables at their location within PTmap */
+	for (i = 0; i < nkpt; i++) {
+		((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
+		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
+	}
+
+	/* Map from zero to end of allocations under 2M pages */
+	/* This replaces some of the KPTphys entries above */
+	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) {
+		((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
+		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
+	}
+
+	/* And connect up the PD to the PDP */
+	for (i = 0; i < nkpdpe; i++) {
+		((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys +
+		    (i << PAGE_SHIFT);
+		((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
+	}
+
+	/*
+	 * Now, set up the direct map region using 2MB and/or 1GB pages.  If
+	 * the end of physical memory is not aligned to a 1GB page boundary,
+	 * then the residual physical memory is mapped with 2MB pages.  Later,
+	 * if pmap_mapdev{_attr}() uses the direct map for non-write-back
+	 * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings
+	 * that are partially used. 
+	 */
+	for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
+		((pd_entry_t *)DMPDphys)[j] = (vm_paddr_t)i << PDRSHIFT;
+		/* Preset PG_M and PG_A because demotion expects it. */
+		((pd_entry_t *)DMPDphys)[j] |= PG_RW | PG_V | PG_PS | PG_G |
+		    PG_M | PG_A;
+	}
+	for (i = 0; i < ndm1g; i++) {
+		((pdp_entry_t *)DMPDPphys)[i] = (vm_paddr_t)i << PDPSHIFT;
+		/* Preset PG_M and PG_A because demotion expects it. */
+		((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS | PG_G |
+		    PG_M | PG_A;
+	}
+	for (j = 0; i < ndmpdp; i++, j++) {
+		((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (j << PAGE_SHIFT);
+		((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
+	}
+
+	/* And recursively map PML4 to itself in order to get PTmap */
+	((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
+	((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
+
+	/* Connect the Direct Map slot(s) up to the PML4. */
+	for (i = 0; i < NDMPML4E; i++) {
+		((pdp_entry_t *)KPML4phys)[DMPML4I + i] = DMPDPphys +
+		    (i << PAGE_SHIFT);
+		((pdp_entry_t *)KPML4phys)[DMPML4I + i] |= PG_RW | PG_V | PG_U;
+	}
+
+	/* Connect the KVA slot up to the PML4 */
+	((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
+	((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
+}
+
+/*
+ *	Bootstrap the system enough to run with virtual memory.
+ *
+ *	On amd64 this is called after mapping has already been enabled
+ *	and just syncs the pmap module with what has already been done.
+ *	[We can't call it easily with mapping off since the kernel is not
+ *	mapped with PA == VA, hence we would have to relocate every address
+ *	from the linked base (virtual) address "KERNBASE" to the actual
+ *	(physical) address starting relative to 0]
+ */
+void
+pmap_bootstrap(vm_paddr_t *firstaddr)
+{
+	vm_offset_t va;
+	pt_entry_t *pte, *unused;
+
+	/*
+	 * Create an initial set of page tables to run the kernel in.
+	 */
+	create_pagetables(firstaddr);
+
+	virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;
+	virtual_avail = pmap_kmem_choose(virtual_avail);
+
+	virtual_end = VM_MAX_KERNEL_ADDRESS;
+
+
+	/* XXX do %cr0 as well */
+	load_cr4(rcr4() | CR4_PGE | CR4_PSE);
+	load_cr3(KPML4phys);
+	if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
+		load_cr4(rcr4() | CR4_SMEP);
+
+	/*
+	 * Initialize the kernel pmap (which is statically allocated).
+	 */
+	PMAP_LOCK_INIT(kernel_pmap);
+	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
+	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+
+ 	/*
+	 * Initialize the global pv list lock.
+	 */
+	rw_init(&pvh_global_lock, "pmap pv global");
+
+	/*
+	 * Reserve some special page table entries/VA space for temporary
+	 * mapping of pages.
+	 */
+#define	SYSMAP(c, p, v, n)	\
+	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
+
+	va = virtual_avail;
+	pte = vtopte(va);
+
+	/*
+	 * CMAP1 is only used for the memory test.
+	 */
+	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
+
+	/*
+	 * Crashdump maps.
+	 */
+	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
+
+	virtual_avail = va;
+
+	/* Initialize the PAT MSR. */
+	pmap_init_pat();
+}
+
+/*
+ * Setup the PAT MSR.
+ */
+void
+pmap_init_pat(void)
+{
+	int pat_table[PAT_INDEX_SIZE];
+	uint64_t pat_msr;
+	u_long cr0, cr4;
+	int i;
+
+	/* Bail if this CPU doesn't implement PAT. */
+	if ((cpu_feature & CPUID_PAT) == 0)
+		panic("no PAT??");
+
+	/* Set default PAT index table. */
+	for (i = 0; i < PAT_INDEX_SIZE; i++)
+		pat_table[i] = -1;
+	pat_table[PAT_WRITE_BACK] = 0;
+	pat_table[PAT_WRITE_THROUGH] = 1;
+	pat_table[PAT_UNCACHEABLE] = 3;
+	pat_table[PAT_WRITE_COMBINING] = 3;
+	pat_table[PAT_WRITE_PROTECTED] = 3;
+	pat_table[PAT_UNCACHED] = 3;
+
+	/* Initialize default PAT entries. */
+	pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
+	    PAT_VALUE(1, PAT_WRITE_THROUGH) |
+	    PAT_VALUE(2, PAT_UNCACHED) |
+	    PAT_VALUE(3, PAT_UNCACHEABLE) |
+	    PAT_VALUE(4, PAT_WRITE_BACK) |
+	    PAT_VALUE(5, PAT_WRITE_THROUGH) |
+	    PAT_VALUE(6, PAT_UNCACHED) |
+	    PAT_VALUE(7, PAT_UNCACHEABLE);
+
+	if (pat_works) {
+		/*
+		 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
+		 * Program 5 and 6 as WP and WC.
+		 * Leave 4 and 7 as WB and UC.
+		 */
+		pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
+		pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
+		    PAT_VALUE(6, PAT_WRITE_COMBINING);
+		pat_table[PAT_UNCACHED] = 2;
+		pat_table[PAT_WRITE_PROTECTED] = 5;
+		pat_table[PAT_WRITE_COMBINING] = 6;
+	} else {
+		/*
+		 * Just replace PAT Index 2 with WC instead of UC-.
+		 */
+		pat_msr &= ~PAT_MASK(2);
+		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
+		pat_table[PAT_WRITE_COMBINING] = 2;
+	}
+
+	/* Disable PGE. */
+	cr4 = rcr4();
+	load_cr4(cr4 & ~CR4_PGE);
+
+	/* Disable caches (CD = 1, NW = 0). */
+	cr0 = rcr0();
+	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
+
+	/* Flushes caches and TLBs. */
+	wbinvd();
+	invltlb();
+
+	/* Update PAT and index table. */
+	wrmsr(MSR_PAT, pat_msr);
+	for (i = 0; i < PAT_INDEX_SIZE; i++)
+		pat_index[i] = pat_table[i];
+
+	/* Flush caches and TLBs again. */
+	wbinvd();
+	invltlb();
+
+	/* Restore caches and PGE. */
+	load_cr0(cr0);
+	load_cr4(cr4);
+}
+
+/*
+ *	Initialize a vm_page's machine-dependent fields.
+ */
+void
+pmap_page_init(vm_page_t m)
+{
+
+	TAILQ_INIT(&m->md.pv_list);
+	m->md.pat_mode = PAT_WRITE_BACK;
+}
+
+/*
+ *	Initialize the pmap module.
+ *	Called by vm_init, to initialize any structures that the pmap
+ *	system needs to map virtual memory.
+ */
+void
+pmap_init(void)
+{
+	vm_page_t mpte;
+	vm_size_t s;
+	int i, pv_npg;
+
+	/*
+	 * Initialize the vm page array entries for the kernel pmap's
+	 * page table pages.
+	 */ 
+	for (i = 0; i < nkpt; i++) {
+		mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
+		KASSERT(mpte >= vm_page_array &&
+		    mpte < &vm_page_array[vm_page_array_size],
+		    ("pmap_init: page table page is out of range"));
+		mpte->pindex = pmap_pde_pindex(KERNBASE) + i;
+		mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
+	}
+
+	/*
+	 * If the kernel is running in a virtual machine on an AMD Family 10h
+	 * processor, then it must assume that MCA is enabled by the virtual
+	 * machine monitor.
+	 */
+	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
+	    CPUID_TO_FAMILY(cpu_id) == 0x10)
+		workaround_erratum383 = 1;
+
+	/*
+	 * Are large page mappings enabled?
+	 */
+	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
+	if (pg_ps_enabled) {
+		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+		    ("pmap_init: can't assign to pagesizes[1]"));
+		pagesizes[1] = NBPDR;
+	}
+
+	/*
+	 * Initialize the pv chunk list mutex.
+	 */
+	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
+
+	/*
+	 * Initialize the pool of pv list locks.
+	 */
+	for (i = 0; i < NPV_LIST_LOCKS; i++)
+		rw_init(&pv_list_locks[i], "pmap pv list");
+
+	/*
+	 * Calculate the size of the pv head table for superpages.
+	 */
+	for (i = 0; phys_avail[i + 1]; i += 2);
+	pv_npg = round_2mpage(phys_avail[(i - 2) + 1]) / NBPDR;
+
+	/*
+	 * Allocate memory for the pv head table for superpages.
+	 */
+	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
+	s = round_page(s);
+	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
+	for (i = 0; i < pv_npg; i++)
+		TAILQ_INIT(&pv_table[i].pv_list);
+}
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
+    "2MB page mapping counters");
+
+static u_long pmap_pde_demotions;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
+    &pmap_pde_demotions, 0, "2MB page demotions");
+
+static u_long pmap_pde_mappings;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
+    &pmap_pde_mappings, 0, "2MB page mappings");
+
+static u_long pmap_pde_p_failures;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
+    &pmap_pde_p_failures, 0, "2MB page promotion failures");
+
+static u_long pmap_pde_promotions;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
+    &pmap_pde_promotions, 0, "2MB page promotions");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0,
+    "1GB page mapping counters");
+
+static u_long pmap_pdpe_demotions;
+SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD,
+    &pmap_pdpe_demotions, 0, "1GB page demotions");
+
+/***************************************************
+ * Low level helper routines.....
+ ***************************************************/
+
+/*
+ * Determine the appropriate bits to set in a PTE or PDE for a specified
+ * caching mode.
+ */
+static int
+pmap_cache_bits(int mode, boolean_t is_pde)
+{
+	int cache_bits, pat_flag, pat_idx;
+
+	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
+		panic("Unknown caching mode %d\n", mode);
+
+	/* The PAT bit is different for PTE's and PDE's. */
+	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+
+	/* Map the caching mode to a PAT index. */
+	pat_idx = pat_index[mode];
+
+	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+	cache_bits = 0;
+	if (pat_idx & 0x4)
+		cache_bits |= pat_flag;
+	if (pat_idx & 0x2)
+		cache_bits |= PG_NC_PCD;
+	if (pat_idx & 0x1)
+		cache_bits |= PG_NC_PWT;
+	return (cache_bits);
+}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB.  Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+	u_long cr4;
+
+	if ((newpde & PG_PS) == 0)
+		/* Demotion: flush a specific 2MB page mapping. */
+		invlpg(va);
+	else if ((newpde & PG_G) == 0)
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB
+		 * because there are too many to flush individually.
+		 */
+		invltlb();
+	else {
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB,
+		 * including any global (PG_G) mappings.
+		 */
+		cr4 = rcr4();
+		load_cr4(cr4 & ~CR4_PGE);
+		/*
+		 * Although preemption at this point could be detrimental to
+		 * performance, it would not lead to an error.  PG_G is simply
+		 * ignored if CR4.PGE is clear.  Moreover, in case this block
+		 * is re-entered, the load_cr4() either above or below will
+		 * modify CR4.PGE flushing the TLB.
+		 */
+		load_cr4(cr4 | CR4_PGE);
+	}
+}
+#ifdef SMP
+/*
+ * For SMP, these functions have to use the IPI mechanism for coherence.
+ *
+ * N.B.: Before calling any of the following TLB invalidation functions,
+ * the calling processor must ensure that all stores updating a non-
+ * kernel page table are globally performed.  Otherwise, another
+ * processor could cache an old, pre-update entry without being
+ * invalidated.  This can happen one of two ways: (1) The pmap becomes
+ * active on another processor after its pm_active field is checked by
+ * one of the following functions but before a store updating the page
+ * table is globally performed. (2) The pmap becomes active on another
+ * processor before its pm_active field is checked but due to
+ * speculative loads one of the following functions stills reads the
+ * pmap as inactive on the other processor.
+ * 
+ * The kernel page table is exempt because its pm_active field is
+ * immutable.  The kernel page table is always active on every
+ * processor.
+ */
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+	cpuset_t other_cpus;
+	u_int cpuid;
+
+	sched_pin();
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+		invlpg(va);
+		smp_invlpg(va);
+	} else {
+		cpuid = PCPU_GET(cpuid);
+		other_cpus = all_cpus;
+		CPU_CLR(cpuid, &other_cpus);
+		if (CPU_ISSET(cpuid, &pmap->pm_active))
+			invlpg(va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
+	}
+	sched_unpin();
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	cpuset_t other_cpus;
+	vm_offset_t addr;
+	u_int cpuid;
+
+	sched_pin();
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+		smp_invlpg_range(sva, eva);
+	} else {
+		cpuid = PCPU_GET(cpuid);
+		other_cpus = all_cpus;
+		CPU_CLR(cpuid, &other_cpus);
+		if (CPU_ISSET(cpuid, &pmap->pm_active))
+			for (addr = sva; addr < eva; addr += PAGE_SIZE)
+				invlpg(addr);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
+	}
+	sched_unpin();
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+	cpuset_t other_cpus;
+	u_int cpuid;
+
+	sched_pin();
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+		invltlb();
+		smp_invltlb();
+	} else {
+		cpuid = PCPU_GET(cpuid);
+		other_cpus = all_cpus;
+		CPU_CLR(cpuid, &other_cpus);
+		if (CPU_ISSET(cpuid, &pmap->pm_active))
+			invltlb();
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
+	}
+	sched_unpin();
+}
+
+void
+pmap_invalidate_cache(void)
+{
+
+	sched_pin();
+	wbinvd();
+	smp_cache_flush();
+	sched_unpin();
+}
+
+struct pde_action {
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
+	vm_offset_t va;
+	pd_entry_t *pde;
+	pd_entry_t newpde;
+	u_int store;		/* processor that updates the PDE */
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if (act->store == PCPU_GET(cpuid))
+		pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
+		pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes.  This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+	struct pde_action act;
+	cpuset_t active, other_cpus;
+	u_int cpuid;
+
+	sched_pin();
+	cpuid = PCPU_GET(cpuid);
+	other_cpus = all_cpus;
+	CPU_CLR(cpuid, &other_cpus);
+	if (pmap == kernel_pmap)
+		active = all_cpus;
+	else
+		active = pmap->pm_active;
+	if (CPU_OVERLAP(&active, &other_cpus)) { 
+		act.store = cpuid;
+		act.invalidate = active;
+		act.va = va;
+		act.pde = pde;
+		act.newpde = newpde;
+		CPU_SET(cpuid, &active);
+		smp_rendezvous_cpus(active,
+		    smp_no_rendevous_barrier, pmap_update_pde_action,
+		    pmap_update_pde_teardown, &act);
+	} else {
+		pde_store(pde, newpde);
+		if (CPU_ISSET(cpuid, &active))
+			pmap_update_pde_invalidate(va, newpde);
+	}
+	sched_unpin();
+}
+#else /* !SMP */
+/*
+ * Normal, non-SMP, invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+		invlpg(va);
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t addr;
+
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+		invltlb();
+}
+
+PMAP_INLINE void
+pmap_invalidate_cache(void)
+{
+
+	wbinvd();
+}
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+	pde_store(pde, newpde);
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+		pmap_update_pde_invalidate(va, newpde);
+}
+#endif /* !SMP */
+
+#define PMAP_CLFLUSH_THRESHOLD   (2 * 1024 * 1024)
+
+void
+pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+	KASSERT((sva & PAGE_MASK) == 0,
+	    ("pmap_invalidate_cache_range: sva not page-aligned"));
+	KASSERT((eva & PAGE_MASK) == 0,
+	    ("pmap_invalidate_cache_range: eva not page-aligned"));
+
+	if (cpu_feature & CPUID_SS)
+		; /* If "Self Snoop" is supported, do nothing. */
+	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+
+		/*
+		 * XXX: Some CPUs fault, hang, or trash the local APIC
+		 * registers if we use CLFLUSH on the local APIC
+		 * range.  The local APIC is always uncached, so we
+		 * don't need to flush for that range anyway.
+		 */
+		if (pmap_kextract(sva) == lapic_paddr)
+			return;
+
+		/*
+		 * Otherwise, do per-cache line flush.  Use the mfence
+		 * instruction to insure that previous stores are
+		 * included in the write-back.  The processor
+		 * propagates flush to other processors in the cache
+		 * coherence domain.
+		 */
+		mfence();
+		for (; sva < eva; sva += cpu_clflush_line_size)
+			clflush(sva);
+		mfence();
+	} else {
+
+		/*
+		 * No targeted cache flush methods are supported by CPU,
+		 * or the supplied range is bigger than 2MB.
+		 * Globally invalidate cache.
+		 */
+		pmap_invalidate_cache();
+	}
+}
+
+/*
+ * Remove the specified set of pages from the data and instruction caches.
+ *
+ * In contrast to pmap_invalidate_cache_range(), this function does not
+ * rely on the CPU's self-snoop feature, because it is intended for use
+ * when moving pages into a different cache domain.
+ */
+void
+pmap_invalidate_cache_pages(vm_page_t *pages, int count)
+{
+	vm_offset_t daddr, eva;
+	int i;
+
+	if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
+	    (cpu_feature & CPUID_CLFSH) == 0)
+		pmap_invalidate_cache();
+	else {
+		mfence();
+		for (i = 0; i < count; i++) {
+			daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i]));
+			eva = daddr + PAGE_SIZE;
+			for (; daddr < eva; daddr += cpu_clflush_line_size)
+				clflush(daddr);
+		}
+		mfence();
+	}
+}
+
+/*
+ * Are we current address space or kernel?
+ */
+static __inline int
+pmap_is_current(pmap_t pmap)
+{
+	return (pmap == kernel_pmap ||
+	    (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME));
+}
+
+/*
+ *	Routine:	pmap_extract
+ *	Function:
+ *		Extract the physical page address associated
+ *		with the given map/virtual_address pair.
+ */
+vm_paddr_t 
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	vm_paddr_t pa;
+
+	pa = 0;
+	PMAP_LOCK(pmap);
+	pdpe = pmap_pdpe(pmap, va);
+	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
+		if ((*pdpe & PG_PS) != 0)
+			pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK);
+		else {
+			pde = pmap_pdpe_to_pde(pdpe, va);
+			if ((*pde & PG_V) != 0) {
+				if ((*pde & PG_PS) != 0) {
+					pa = (*pde & PG_PS_FRAME) |
+					    (va & PDRMASK);
+				} else {
+					pte = pmap_pde_to_pte(pde, va);
+					pa = (*pte & PG_FRAME) |
+					    (va & PAGE_MASK);
+				}
+			}
+		}
+	}
+	PMAP_UNLOCK(pmap);
+	return (pa);
+}
+
+/*
+ *	Routine:	pmap_extract_and_hold
+ *	Function:
+ *		Atomically extract and hold the physical page
+ *		with the given pmap and virtual address pair
+ *		if that mapping permits the given protection.
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+	pd_entry_t pde, *pdep;
+	pt_entry_t pte;
+	vm_paddr_t pa;
+	vm_page_t m;
+
+	pa = 0;
+	m = NULL;
+	PMAP_LOCK(pmap);
+retry:
+	pdep = pmap_pde(pmap, va);
+	if (pdep != NULL && (pde = *pdep)) {
+		if (pde & PG_PS) {
+			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
+				if (vm_page_pa_tryrelock(pmap, (pde &
+				    PG_PS_FRAME) | (va & PDRMASK), &pa))
+					goto retry;
+				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
+				    (va & PDRMASK));
+				vm_page_hold(m);
+			}
+		} else {
+			pte = *pmap_pde_to_pte(pdep, va);
+			if ((pte & PG_V) &&
+			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
+				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+				    &pa))
+					goto retry;
+				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
+				vm_page_hold(m);
+			}
+		}
+	}
+	PA_UNLOCK_COND(pa);
+	PMAP_UNLOCK(pmap);
+	return (m);
+}
+
+vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+	pd_entry_t pde;
+	vm_paddr_t pa;
+
+	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
+		pa = DMAP_TO_PHYS(va);
+	} else {
+		pde = *vtopde(va);
+		if (pde & PG_PS) {
+			pa = (pde & PG_PS_FRAME) | (va & PDRMASK);
+		} else {
+			/*
+			 * Beware of a concurrent promotion that changes the
+			 * PDE at this point!  For example, vtopte() must not
+			 * be used to access the PTE because it would use the
+			 * new PDE.  It is, however, safe to use the old PDE
+			 * because the page table page is preserved by the
+			 * promotion.
+			 */
+			pa = *pmap_pde_to_pte(&pde, va);
+			pa = (pa & PG_FRAME) | (va & PAGE_MASK);
+		}
+	}
+	return (pa);
+}
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+/*
+ * Add a wired page to the kva.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void 
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
+{
+	pt_entry_t *pte;
+
+	pte = vtopte(va);
+	pte_store(pte, pa | PG_RW | PG_V | PG_G);
+}
+
+static __inline void
+pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
+{
+	pt_entry_t *pte;
+
+	pte = vtopte(va);
+	pte_store(pte, pa | PG_RW | PG_V | PG_G | pmap_cache_bits(mode, 0));
+}
+
+/*
+ * Remove a page from the kernel pagetables.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kremove(vm_offset_t va)
+{
+	pt_entry_t *pte;
+
+	pte = vtopte(va);
+	pte_clear(pte);
+}
+
+/*
+ *	Used to map a range of physical addresses into kernel
+ *	virtual address space.
+ *
+ *	The value passed in '*virt' is a suggested virtual address for
+ *	the mapping. Architectures which can support a direct-mapped
+ *	physical to virtual region can return the appropriate address
+ *	within that region, leaving '*virt' unchanged. Other
+ *	architectures should map the pages starting at '*virt' and
+ *	update '*virt' with the first usable address after the mapped
+ *	region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+	return PHYS_TO_DMAP(start);
+}
+
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over.  The page *must* be wired.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+{
+	pt_entry_t *endpte, oldpte, pa, *pte;
+	vm_page_t m;
+
+	oldpte = 0;
+	pte = vtopte(sva);
+	endpte = pte + count;
+	while (pte < endpte) {
+		m = *ma++;
+		pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
+		if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
+			oldpte |= *pte;
+			pte_store(pte, pa | PG_G | PG_RW | PG_V);
+		}
+		pte++;
+	}
+	if (__predict_false((oldpte & PG_V) != 0))
+		pmap_invalidate_range(kernel_pmap, sva, sva + count *
+		    PAGE_SIZE);
+}
+
+/*
+ * This routine tears out page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qremove(vm_offset_t sva, int count)
+{
+	vm_offset_t va;
+
+	va = sva;
+	while (count-- > 0) {
+		KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va));
+		pmap_kremove(va);
+		va += PAGE_SIZE;
+	}
+	pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+static __inline void
+pmap_free_zero_pages(vm_page_t free)
+{
+	vm_page_t m;
+
+	while (free != NULL) {
+		m = free;
+		free = (void *)m->object;
+		m->object = NULL;
+		/* Preserve the page's PG_ZERO setting. */
+		vm_page_free_toq(m);
+	}
+}
+
+/*
+ * Schedule the specified unused page table page to be freed.  Specifically,
+ * add the page to the specified list of pages that will be released to the
+ * physical memory manager after the TLB has been updated.
+ */
+static __inline void
+pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
+{
+
+	if (set_PG_ZERO)
+		m->flags |= PG_ZERO;
+	else
+		m->flags &= ~PG_ZERO;
+	m->object = (void *)*free;
+	*free = m;
+}
+	
+/*
+ * Inserts the specified page table page into the specified pmap's collection
+ * of idle page table pages.  Each of a pmap's page table pages is responsible
+ * for mapping a distinct range of virtual addresses.  The pmap's collection is
+ * ordered by this virtual address range.
+ */
+static __inline void
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	vm_radix_insert(&pmap->pm_root, mpte);
+}
+
+/*
+ * Looks for a page table page mapping the specified virtual address in the
+ * specified pmap's collection of idle page table pages.  Returns NULL if there
+ * is no page table page corresponding to the specified virtual address.
+ */
+static __inline vm_page_t
+pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	return (vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)));
+}
+
+/*
+ * Removes the specified page table page from the specified pmap's collection
+ * of idle page table pages.  The specified page table page must be a member of
+ * the pmap's collection.
+ */
+static __inline void
+pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	vm_radix_remove(&pmap->pm_root, mpte->pindex);
+}
+
+/*
+ * Decrements a page table page's wire count, which is used to record the
+ * number of valid page table entries within the page.  If the wire count
+ * drops to zero, then the page table page is unmapped.  Returns TRUE if the
+ * page table page was unmapped and FALSE otherwise.
+ */
+static inline boolean_t
+pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
+{
+
+	--m->wire_count;
+	if (m->wire_count == 0) {
+		_pmap_unwire_ptp(pmap, va, m, free);
+		return (TRUE);
+	} else
+		return (FALSE);
+}
+
+static void
+_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	/*
+	 * unmap the page table page
+	 */
+	if (m->pindex >= (NUPDE + NUPDPE)) {
+		/* PDP page */
+		pml4_entry_t *pml4;
+		pml4 = pmap_pml4e(pmap, va);
+		*pml4 = 0;
+	} else if (m->pindex >= NUPDE) {
+		/* PD page */
+		pdp_entry_t *pdp;
+		pdp = pmap_pdpe(pmap, va);
+		*pdp = 0;
+	} else {
+		/* PTE page */
+		pd_entry_t *pd;
+		pd = pmap_pde(pmap, va);
+		*pd = 0;
+	}
+	pmap_resident_count_dec(pmap, 1);
+	if (m->pindex < NUPDE) {
+		/* We just released a PT, unhold the matching PD */
+		vm_page_t pdpg;
+
+		pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
+		pmap_unwire_ptp(pmap, va, pdpg, free);
+	}
+	if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
+		/* We just released a PD, unhold the matching PDP */
+		vm_page_t pdppg;
+
+		pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
+		pmap_unwire_ptp(pmap, va, pdppg, free);
+	}
+
+	/*
+	 * This is a release store so that the ordinary store unmapping
+	 * the page table page is globally performed before TLB shoot-
+	 * down is begun.
+	 */
+	atomic_subtract_rel_int(&cnt.v_wire_count, 1);
+
+	/* 
+	 * Put page on a list so that it is released after
+	 * *ALL* TLB shootdown is done
+	 */
+	pmap_add_delayed_free_list(m, free, TRUE);
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+static int
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free)
+{
+	vm_page_t mpte;
+
+	if (va >= VM_MAXUSER_ADDRESS)
+		return (0);
+	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
+	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
+	return (pmap_unwire_ptp(pmap, va, mpte, free));
+}
+
+void
+pmap_pinit0(pmap_t pmap)
+{
+
+	PMAP_LOCK_INIT(pmap);
+	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
+	pmap->pm_root.rt_root = 0;
+	CPU_ZERO(&pmap->pm_active);
+	PCPU_SET(curpmap, pmap);
+	TAILQ_INIT(&pmap->pm_pvchunk);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+}
+
+/*
+ * Initialize a preallocated and zeroed pmap structure,
+ * such as one in a vmspace structure.
+ */
+int
+pmap_pinit(pmap_t pmap)
+{
+	vm_page_t pml4pg;
+	int i;
+
+	PMAP_LOCK_INIT(pmap);
+
+	/*
+	 * allocate the page directory page
+	 */
+	while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
+	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
+		VM_WAIT;
+
+	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
+
+	if ((pml4pg->flags & PG_ZERO) == 0)
+		pagezero(pmap->pm_pml4);
+
+	/* Wire in kernel global address entries. */
+	pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
+	for (i = 0; i < NDMPML4E; i++) {
+		pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
+		    PG_RW | PG_V | PG_U;
+	}
+
+	/* install self-referential address mapping entry(s) */
+	pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
+
+	pmap->pm_root.rt_root = 0;
+	CPU_ZERO(&pmap->pm_active);
+	TAILQ_INIT(&pmap->pm_pvchunk);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+
+	return (1);
+}
+
+/*
+ * This routine is called if the desired page table page does not exist.
+ *
+ * If page table page allocation fails, this routine may sleep before
+ * returning NULL.  It sleeps only if a lock pointer was given.
+ *
+ * Note: If a page allocation fails at page table level two or three,
+ * one or two pages may be held during the wait, only to be released
+ * afterwards.  This conservative approach is easily argued to avoid
+ * race conditions.
+ */
+static vm_page_t
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
+{
+	vm_page_t m, pdppg, pdpg;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	/*
+	 * Allocate a page table page.
+	 */
+	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+		if (lockp != NULL) {
+			RELEASE_PV_LIST_LOCK(lockp);
+			PMAP_UNLOCK(pmap);
+			rw_runlock(&pvh_global_lock);
+			VM_WAIT;
+			rw_rlock(&pvh_global_lock);
+			PMAP_LOCK(pmap);
+		}
+
+		/*
+		 * Indicate the need to retry.  While waiting, the page table
+		 * page may have been allocated.
+		 */
+		return (NULL);
+	}
+	if ((m->flags & PG_ZERO) == 0)
+		pmap_zero_page(m);
+
+	/*
+	 * Map the pagetable page into the process address space, if
+	 * it isn't already there.
+	 */
+
+	if (ptepindex >= (NUPDE + NUPDPE)) {
+		pml4_entry_t *pml4;
+		vm_pindex_t pml4index;
+
+		/* Wire up a new PDPE page */
+		pml4index = ptepindex - (NUPDE + NUPDPE);
+		pml4 = &pmap->pm_pml4[pml4index];
+		*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+	} else if (ptepindex >= NUPDE) {
+		vm_pindex_t pml4index;
+		vm_pindex_t pdpindex;
+		pml4_entry_t *pml4;
+		pdp_entry_t *pdp;
+
+		/* Wire up a new PDE page */
+		pdpindex = ptepindex - NUPDE;
+		pml4index = pdpindex >> NPML4EPGSHIFT;
+
+		pml4 = &pmap->pm_pml4[pml4index];
+		if ((*pml4 & PG_V) == 0) {
+			/* Have to allocate a new pdp, recurse */
+			if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
+			    lockp) == NULL) {
+				--m->wire_count;
+				atomic_subtract_int(&cnt.v_wire_count, 1);
+				vm_page_free_zero(m);
+				return (NULL);
+			}
+		} else {
+			/* Add reference to pdp page */
+			pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME);
+			pdppg->wire_count++;
+		}
+		pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+
+		/* Now find the pdp page */
+		pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
+		*pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+	} else {
+		vm_pindex_t pml4index;
+		vm_pindex_t pdpindex;
+		pml4_entry_t *pml4;
+		pdp_entry_t *pdp;
+		pd_entry_t *pd;
+
+		/* Wire up a new PTE page */
+		pdpindex = ptepindex >> NPDPEPGSHIFT;
+		pml4index = pdpindex >> NPML4EPGSHIFT;
+
+		/* First, find the pdp and check that its valid. */
+		pml4 = &pmap->pm_pml4[pml4index];
+		if ((*pml4 & PG_V) == 0) {
+			/* Have to allocate a new pd, recurse */
+			if (_pmap_allocpte(pmap, NUPDE + pdpindex,
+			    lockp) == NULL) {
+				--m->wire_count;
+				atomic_subtract_int(&cnt.v_wire_count, 1);
+				vm_page_free_zero(m);
+				return (NULL);
+			}
+			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
+		} else {
+			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
+			if ((*pdp & PG_V) == 0) {
+				/* Have to allocate a new pd, recurse */
+				if (_pmap_allocpte(pmap, NUPDE + pdpindex,
+				    lockp) == NULL) {
+					--m->wire_count;
+					atomic_subtract_int(&cnt.v_wire_count,
+					    1);
+					vm_page_free_zero(m);
+					return (NULL);
+				}
+			} else {
+				/* Add reference to the pd page */
+				pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME);
+				pdpg->wire_count++;
+			}
+		}
+		pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME);
+
+		/* Now we know where the page directory page is */
+		pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)];
+		*pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+	}
+
+	pmap_resident_count_inc(pmap, 1);
+
+	return (m);
+}
+
+static vm_page_t
+pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+{
+	vm_pindex_t pdpindex, ptepindex;
+	pdp_entry_t *pdpe;
+	vm_page_t pdpg;
+
+retry:
+	pdpe = pmap_pdpe(pmap, va);
+	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
+		/* Add a reference to the pd page. */
+		pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME);
+		pdpg->wire_count++;
+	} else {
+		/* Allocate a pd page. */
+		ptepindex = pmap_pde_pindex(va);
+		pdpindex = ptepindex >> NPDPEPGSHIFT;
+		pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp);
+		if (pdpg == NULL && lockp != NULL)
+			goto retry;
+	}
+	return (pdpg);
+}
+
+static vm_page_t
+pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+{
+	vm_pindex_t ptepindex;
+	pd_entry_t *pd;
+	vm_page_t m;
+
+	/*
+	 * Calculate pagetable page index
+	 */
+	ptepindex = pmap_pde_pindex(va);
+retry:
+	/*
+	 * Get the page directory entry
+	 */
+	pd = pmap_pde(pmap, va);
+
+	/*
+	 * This supports switching from a 2MB page to a
+	 * normal 4K page.
+	 */
+	if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
+		if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) {
+			/*
+			 * Invalidation of the 2MB page mapping may have caused
+			 * the deallocation of the underlying PD page.
+			 */
+			pd = NULL;
+		}
+	}
+
+	/*
+	 * If the page table page is mapped, we just increment the
+	 * hold count, and activate it.
+	 */
+	if (pd != NULL && (*pd & PG_V) != 0) {
+		m = PHYS_TO_VM_PAGE(*pd & PG_FRAME);
+		m->wire_count++;
+	} else {
+		/*
+		 * Here if the pte page isn't mapped, or if it has been
+		 * deallocated.
+		 */
+		m = _pmap_allocpte(pmap, ptepindex, lockp);
+		if (m == NULL && lockp != NULL)
+			goto retry;
+	}
+	return (m);
+}
+
+
+/***************************************************
+ * Pmap allocation/deallocation routines.
+ ***************************************************/
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+	vm_page_t m;
+	int i;
+
+	KASSERT(pmap->pm_stats.resident_count == 0,
+	    ("pmap_release: pmap resident count %ld != 0",
+	    pmap->pm_stats.resident_count));
+	KASSERT(vm_radix_is_empty(&pmap->pm_root),
+	    ("pmap_release: pmap has reserved page table page(s)"));
+
+	m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
+
+	pmap->pm_pml4[KPML4I] = 0;	/* KVA */
+	for (i = 0; i < NDMPML4E; i++)	/* Direct Map */
+		pmap->pm_pml4[DMPML4I + i] = 0;
+	pmap->pm_pml4[PML4PML4I] = 0;	/* Recursive Mapping */
+
+	m->wire_count--;
+	atomic_subtract_int(&cnt.v_wire_count, 1);
+	vm_page_free_zero(m);
+	PMAP_LOCK_DESTROY(pmap);
+}
+
+static int
+kvm_size(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
+
+	return sysctl_handle_long(oidp, &ksize, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_size, "LU", "Size of KVM");
+
+static int
+kvm_free(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+
+	return sysctl_handle_long(oidp, &kfree, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_free, "LU", "Amount of KVM free");
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+	vm_paddr_t paddr;
+	vm_page_t nkpg;
+	pd_entry_t *pde, newpdir;
+	pdp_entry_t *pdpe;
+
+	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+
+	/*
+	 * Return if "addr" is within the range of kernel page table pages
+	 * that were preallocated during pmap bootstrap.  Moreover, leave
+	 * "kernel_vm_end" and the kernel page table as they were.
+	 *
+	 * The correctness of this action is based on the following
+	 * argument: vm_map_findspace() allocates contiguous ranges of the
+	 * kernel virtual address space.  It calls this function if a range
+	 * ends after "kernel_vm_end".  If the kernel is mapped between
+	 * "kernel_vm_end" and "addr", then the range cannot begin at
+	 * "kernel_vm_end".  In fact, its beginning address cannot be less
+	 * than the kernel.  Thus, there is no immediate need to allocate
+	 * any new kernel page table pages between "kernel_vm_end" and
+	 * "KERNBASE".
+	 */
+	if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR)
+		return;
+
+	addr = roundup2(addr, NBPDR);
+	if (addr - 1 >= kernel_map->max_offset)
+		addr = kernel_map->max_offset;
+	while (kernel_vm_end < addr) {
+		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
+		if ((*pdpe & PG_V) == 0) {
+			/* We need a new PDP entry */
+			nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT,
+			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
+			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+			if (nkpg == NULL)
+				panic("pmap_growkernel: no memory to grow kernel");
+			if ((nkpg->flags & PG_ZERO) == 0)
+				pmap_zero_page(nkpg);
+			paddr = VM_PAGE_TO_PHYS(nkpg);
+			*pdpe = (pdp_entry_t)
+				(paddr | PG_V | PG_RW | PG_A | PG_M);
+			continue; /* try again */
+		}
+		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
+		if ((*pde & PG_V) != 0) {
+			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
+			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+				kernel_vm_end = kernel_map->max_offset;
+				break;                       
+			}
+			continue;
+		}
+
+		nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end),
+		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
+		if (nkpg == NULL)
+			panic("pmap_growkernel: no memory to grow kernel");
+		if ((nkpg->flags & PG_ZERO) == 0)
+			pmap_zero_page(nkpg);
+		paddr = VM_PAGE_TO_PHYS(nkpg);
+		newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M);
+		pde_store(pde, newpdir);
+
+		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
+		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+			kernel_vm_end = kernel_map->max_offset;
+			break;                       
+		}
+	}
+}
+
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
+CTASSERT(_NPCM == 3);
+CTASSERT(_NPCPV == 168);
+
+static __inline struct pv_chunk *
+pv_to_chunk(pv_entry_t pv)
+{
+
+	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
+}
+
+#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
+
+#define	PC_FREE0	0xfffffffffffffffful
+#define	PC_FREE1	0xfffffffffffffffful
+#define	PC_FREE2	0x000000fffffffffful
+
+static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
+
+#ifdef PV_STATS
+static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
+	"Current number of pv entry chunks");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
+	"Current number of pv entry chunks allocated");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
+	"Current number of pv entry chunks frees");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
+	"Number of times tried to get a chunk page but failed.");
+
+static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
+static int pv_entry_spare;
+
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
+	"Current number of pv entry frees");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
+	"Current number of pv entry allocs");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
+	"Current number of pv entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
+	"Current number of spare pv entries");
+#endif
+
+/*
+ * We are in a serious low memory condition.  Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk.
+ *
+ * Returns NULL if PV entries were reclaimed from the specified pmap.
+ *
+ * We do not, however, unmap 2mpages because subsequent accesses will
+ * allocate per-page pv entries until repromotion occurs, thereby
+ * exacerbating the shortage of free pv entries.
+ */
+static vm_page_t
+reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+{
+	struct pch new_tail;
+	struct pv_chunk *pc;
+	struct md_page *pvh;
+	pd_entry_t *pde;
+	pmap_t pmap;
+	pt_entry_t *pte, tpte;
+	pv_entry_t pv;
+	vm_offset_t va;
+	vm_page_t free, m, m_pc;
+	uint64_t inuse;
+	int bit, field, freed;
+	
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+	KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
+	pmap = NULL;
+	free = m_pc = NULL;
+	TAILQ_INIT(&new_tail);
+	mtx_lock(&pv_chunks_mutex);
+	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && free == NULL) {
+		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+		mtx_unlock(&pv_chunks_mutex);
+		if (pmap != pc->pc_pmap) {
+			if (pmap != NULL) {
+				pmap_invalidate_all(pmap);
+				if (pmap != locked_pmap)
+					PMAP_UNLOCK(pmap);
+			}
+			pmap = pc->pc_pmap;
+			/* Avoid deadlock and lock recursion. */
+			if (pmap > locked_pmap) {
+				RELEASE_PV_LIST_LOCK(lockp);
+				PMAP_LOCK(pmap);
+			} else if (pmap != locked_pmap &&
+			    !PMAP_TRYLOCK(pmap)) {
+				pmap = NULL;
+				TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+				mtx_lock(&pv_chunks_mutex);
+				continue;
+			}
+		}
+
+		/*
+		 * Destroy every non-wired, 4 KB page mapping in the chunk.
+		 */
+		freed = 0;
+		for (field = 0; field < _NPCM; field++) {
+			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+			    inuse != 0; inuse &= ~(1UL << bit)) {
+				bit = bsfq(inuse);
+				pv = &pc->pc_pventry[field * 64 + bit];
+				va = pv->pv_va;
+				pde = pmap_pde(pmap, va);
+				if ((*pde & PG_PS) != 0)
+					continue;
+				pte = pmap_pde_to_pte(pde, va);
+				if ((*pte & PG_W) != 0)
+					continue;
+				tpte = pte_load_clear(pte);
+				if ((tpte & PG_G) != 0)
+					pmap_invalidate_page(pmap, va);
+				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+					vm_page_dirty(m);
+				if ((tpte & PG_A) != 0)
+					vm_page_aflag_set(m, PGA_REFERENCED);
+				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+				if (TAILQ_EMPTY(&m->md.pv_list) &&
+				    (m->flags & PG_FICTITIOUS) == 0) {
+					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+					if (TAILQ_EMPTY(&pvh->pv_list)) {
+						vm_page_aflag_clear(m,
+						    PGA_WRITEABLE);
+					}
+				}
+				pc->pc_map[field] |= 1UL << bit;
+				pmap_unuse_pt(pmap, va, *pde, &free);	
+				freed++;
+			}
+		}
+		if (freed == 0) {
+			TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+			mtx_lock(&pv_chunks_mutex);
+			continue;
+		}
+		/* Every freed mapping is for a 4 KB page. */
+		pmap_resident_count_dec(pmap, freed);
+		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
+		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+		if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+		    pc->pc_map[2] == PC_FREE2) {
+			PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+			PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+			PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+			/* Entire chunk is free; return it. */
+			m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+			dump_drop_page(m_pc->phys_addr);
+			mtx_lock(&pv_chunks_mutex);
+			break;
+		}
+		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+		mtx_lock(&pv_chunks_mutex);
+		/* One freed pv entry in locked_pmap is sufficient. */
+		if (pmap == locked_pmap)
+			break;
+	}
+	TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+	mtx_unlock(&pv_chunks_mutex);
+	if (pmap != NULL) {
+		pmap_invalidate_all(pmap);
+		if (pmap != locked_pmap)
+			PMAP_UNLOCK(pmap);
+	}
+	if (m_pc == NULL && free != NULL) {
+		m_pc = free;
+		free = (void *)m_pc->object;
+		/* Recycle a freed page table page. */
+		m_pc->wire_count = 1;
+		atomic_add_int(&cnt.v_wire_count, 1);
+	}
+	pmap_free_zero_pages(free);
+	return (m_pc);
+}
+
+/*
+ * free the pv_entry back to the free list
+ */
+static void
+free_pv_entry(pmap_t pmap, pv_entry_t pv)
+{
+	struct pv_chunk *pc;
+	int idx, field, bit;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
+	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
+	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
+	pc = pv_to_chunk(pv);
+	idx = pv - &pc->pc_pventry[0];
+	field = idx / 64;
+	bit = idx % 64;
+	pc->pc_map[field] |= 1ul << bit;
+	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
+	    pc->pc_map[2] != PC_FREE2) {
+		/* 98% of the time, pc is already at the head of the list. */
+		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
+			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+		}
+		return;
+	}
+	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+	free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+	vm_page_t m;
+
+	mtx_lock(&pv_chunks_mutex);
+ 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+	mtx_unlock(&pv_chunks_mutex);
+	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
+	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+	/* entire chunk is free, return it */
+	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+	dump_drop_page(m->phys_addr);
+	vm_page_unwire(m, 0);
+	vm_page_free(m);
+}
+
+/*
+ * Returns a new PV entry, allocating a new PV chunk from the system when
+ * needed.  If this PV chunk allocation fails and a PV list lock pointer was
+ * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
+ * returned.
+ *
+ * The given PV list lock may be released.
+ */
+static pv_entry_t
+get_pv_entry(pmap_t pmap, struct rwlock **lockp)
+{
+	int bit, field;
+	pv_entry_t pv;
+	struct pv_chunk *pc;
+	vm_page_t m;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
+retry:
+	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+	if (pc != NULL) {
+		for (field = 0; field < _NPCM; field++) {
+			if (pc->pc_map[field]) {
+				bit = bsfq(pc->pc_map[field]);
+				break;
+			}
+		}
+		if (field < _NPCM) {
+			pv = &pc->pc_pventry[field * 64 + bit];
+			pc->pc_map[field] &= ~(1ul << bit);
+			/* If this was the last item, move it to tail */
+			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
+			    pc->pc_map[2] == 0) {
+				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
+				    pc_list);
+			}
+			PV_STAT(atomic_add_long(&pv_entry_count, 1));
+			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
+			return (pv);
+		}
+	}
+	/* No free items, allocate another chunk */
+	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED);
+	if (m == NULL) {
+		if (lockp == NULL) {
+			PV_STAT(pc_chunk_tryfail++);
+			return (NULL);
+		}
+		m = reclaim_pv_chunk(pmap, lockp);
+		if (m == NULL)
+			goto retry;
+	}
+	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
+	dump_add_page(m->phys_addr);
+	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+	pc->pc_pmap = pmap;
+	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
+	pc->pc_map[1] = PC_FREE1;
+	pc->pc_map[2] = PC_FREE2;
+	mtx_lock(&pv_chunks_mutex);
+	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+	mtx_unlock(&pv_chunks_mutex);
+	pv = &pc->pc_pventry[0];
+	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+	PV_STAT(atomic_add_long(&pv_entry_count, 1));
+	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
+	return (pv);
+}
+
+/*
+ * Returns the number of one bits within the given PV chunk map element.
+ */
+static int
+popcnt_pc_map_elem(uint64_t elem)
+{
+	int count;
+
+	/*
+	 * This simple method of counting the one bits performs well because
+	 * the given element typically contains more zero bits than one bits.
+	 */
+	count = 0;
+	for (; elem != 0; elem &= elem - 1)
+		count++;
+	return (count);
+}
+
+/*
+ * Ensure that the number of spare PV entries in the specified pmap meets or
+ * exceeds the given count, "needed".
+ *
+ * The given PV list lock may be released.
+ */
+static void
+reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+{
+	struct pch new_tail;
+	struct pv_chunk *pc;
+	int avail, free;
+	vm_page_t m;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
+
+	/*
+	 * Newly allocated PV chunks must be stored in a private list until
+	 * the required number of PV chunks have been allocated.  Otherwise,
+	 * reclaim_pv_chunk() could recycle one of these chunks.  In
+	 * contrast, these chunks must be added to the pmap upon allocation.
+	 */
+	TAILQ_INIT(&new_tail);
+retry:
+	avail = 0;
+	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
+		if ((cpu_feature2 & CPUID2_POPCNT) == 0) {
+			free = popcnt_pc_map_elem(pc->pc_map[0]);
+			free += popcnt_pc_map_elem(pc->pc_map[1]);
+			free += popcnt_pc_map_elem(pc->pc_map[2]);
+		} else {
+			free = popcntq(pc->pc_map[0]);
+			free += popcntq(pc->pc_map[1]);
+			free += popcntq(pc->pc_map[2]);
+		}
+		if (free == 0)
+			break;
+		avail += free;
+		if (avail >= needed)
+			break;
+	}
+	for (; avail < needed; avail += _NPCPV) {
+		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+		    VM_ALLOC_WIRED);
+		if (m == NULL) {
+			m = reclaim_pv_chunk(pmap, lockp);
+			if (m == NULL)
+				goto retry;
+		}
+		PV_STAT(atomic_add_int(&pc_chunk_count, 1));
+		PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
+		dump_add_page(m->phys_addr);
+		pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+		pc->pc_pmap = pmap;
+		pc->pc_map[0] = PC_FREE0;
+		pc->pc_map[1] = PC_FREE1;
+		pc->pc_map[2] = PC_FREE2;
+		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+		PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
+	}
+	if (!TAILQ_EMPTY(&new_tail)) {
+		mtx_lock(&pv_chunks_mutex);
+		TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+		mtx_unlock(&pv_chunks_mutex);
+	}
+}
+
+/*
+ * First find and then remove the pv entry for the specified pmap and virtual
+ * address from the specified pv list.  Returns the pv entry if found and NULL
+ * otherwise.  This operation can be performed on pv lists for either 4KB or
+ * 2MB page mappings.
+ */
+static __inline pv_entry_t
+pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
+{
+	pv_entry_t pv;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
+			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+			break;
+		}
+	}
+	return (pv);
+}
+
+/*
+ * After demotion from a 2MB page mapping to 512 4KB page mappings,
+ * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
+ * entries for each of the 4KB page mappings.
+ */
+static void
+pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+    struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	struct pv_chunk *pc;
+	pv_entry_t pv;
+	vm_offset_t va_last;
+	vm_page_t m;
+	int bit, field;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((pa & PDRMASK) == 0,
+	    ("pmap_pv_demote_pde: pa is not 2mpage aligned"));
+	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+	/*
+	 * Transfer the 2mpage's pv entry for this mapping to the first
+	 * page's pv list.  Once this transfer begins, the pv list lock
+	 * must not be released until the last pv entry is reinstantiated.
+	 */
+	pvh = pa_to_pvh(pa);
+	va = trunc_2mpage(va);
+	pv = pmap_pvh_remove(pvh, pmap, va);
+	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
+	m = PHYS_TO_VM_PAGE(pa);
+	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+	/* Instantiate the remaining NPTEPG - 1 pv entries. */
+	PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1));
+	va_last = va + NBPDR - PAGE_SIZE;
+	for (;;) {
+		pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+		KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
+		    pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare"));
+		for (field = 0; field < _NPCM; field++) {
+			while (pc->pc_map[field]) {
+				bit = bsfq(pc->pc_map[field]);
+				pc->pc_map[field] &= ~(1ul << bit);
+				pv = &pc->pc_pventry[field * 64 + bit];
+				va += PAGE_SIZE;
+				pv->pv_va = va;
+				m++;
+				KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+			    ("pmap_pv_demote_pde: page %p is not managed", m));
+				TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+				if (va == va_last)
+					goto out;
+			}
+		}
+		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+	}
+out:
+	if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
+		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+	}
+	PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1));
+	PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1));
+}
+
+/*
+ * After promotion from 512 4KB page mappings to a single 2MB page mapping,
+ * replace the many pv entries for the 4KB page mappings by a single pv entry
+ * for the 2MB page mapping.
+ */
+static void
+pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+    struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	pv_entry_t pv;
+	vm_offset_t va_last;
+	vm_page_t m;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	KASSERT((pa & PDRMASK) == 0,
+	    ("pmap_pv_promote_pde: pa is not 2mpage aligned"));
+	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+	/*
+	 * Transfer the first page's pv entry for this mapping to the 2mpage's
+	 * pv list.  Aside from avoiding the cost of a call to get_pv_entry(),
+	 * a transfer avoids the possibility that get_pv_entry() calls
+	 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
+	 * mappings that is being promoted.
+	 */
+	m = PHYS_TO_VM_PAGE(pa);
+	va = trunc_2mpage(va);
+	pv = pmap_pvh_remove(&m->md, pmap, va);
+	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
+	pvh = pa_to_pvh(pa);
+	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+	/* Free the remaining NPTEPG - 1 pv entries. */
+	va_last = va + NBPDR - PAGE_SIZE;
+	do {
+		m++;
+		va += PAGE_SIZE;
+		pmap_pvh_free(&m->md, pmap, va);
+	} while (va < va_last);
+}
+
+/*
+ * First find and then destroy the pv entry for the specified pmap and virtual
+ * address.  This operation can be performed on pv lists for either 4KB or 2MB
+ * page mappings.
+ */
+static void
+pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
+{
+	pv_entry_t pv;
+
+	pv = pmap_pvh_remove(pvh, pmap, va);
+	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
+	free_pv_entry(pmap, pv);
+}
+
+/*
+ * Conditionally create the PV entry for a 4KB page mapping if the required
+ * memory can be allocated without resorting to reclamation.
+ */
+static boolean_t
+pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    struct rwlock **lockp)
+{
+	pv_entry_t pv;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	/* Pass NULL instead of the lock pointer to disable reclamation. */
+	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
+		pv->pv_va = va;
+		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+		return (TRUE);
+	} else
+		return (FALSE);
+}
+
+/*
+ * Conditionally create the PV entry for a 2MB page mapping if the required
+ * memory can be allocated without resorting to reclamation.
+ */
+static boolean_t
+pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+    struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	pv_entry_t pv;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	/* Pass NULL instead of the lock pointer to disable reclamation. */
+	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
+		pv->pv_va = va;
+		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+		pvh = pa_to_pvh(pa);
+		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+		return (TRUE);
+	} else
+		return (FALSE);
+}
+
+/*
+ * Fills a page table page with mappings to consecutive physical pages.
+ */
+static void
+pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
+{
+	pt_entry_t *pte;
+
+	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+		*pte = newpte;
+		newpte += PAGE_SIZE;
+	}
+}
+
+/*
+ * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
+ * mapping is invalidated.
+ */
+static boolean_t
+pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+	struct rwlock *lock;
+	boolean_t rv;
+
+	lock = NULL;
+	rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
+	if (lock != NULL)
+		rw_wunlock(lock);
+	return (rv);
+}
+
+static boolean_t
+pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp)
+{
+	pd_entry_t newpde, oldpde;
+	pt_entry_t *firstpte, newpte;
+	vm_paddr_t mptepa;
+	vm_page_t free, mpte;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpde = *pde;
+	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
+	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
+	mpte = pmap_lookup_pt_page(pmap, va);
+	if (mpte != NULL)
+		pmap_remove_pt_page(pmap, mpte);
+	else {
+		KASSERT((oldpde & PG_W) == 0,
+		    ("pmap_demote_pde: page table page for a wired mapping"
+		    " is missing"));
+
+		/*
+		 * Invalidate the 2MB page mapping and return "failure" if the
+		 * mapping was never accessed or the allocation of the new
+		 * page table page fails.  If the 2MB page mapping belongs to
+		 * the direct map region of the kernel's address space, then
+		 * the page allocation request specifies the highest possible
+		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the priority is
+		 * normal.  Page table pages are preallocated for every other
+		 * part of the kernel address space, so the direct map region
+		 * is the only part of the kernel address space that must be
+		 * handled here.
+		 */
+		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
+		    pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va <
+		    DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
+		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
+			free = NULL;
+			pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free,
+			    lockp);
+			pmap_invalidate_page(pmap, trunc_2mpage(va));
+			pmap_free_zero_pages(free);
+			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return (FALSE);
+		}
+		if (va < VM_MAXUSER_ADDRESS)
+			pmap_resident_count_inc(pmap, 1);
+	}
+	mptepa = VM_PAGE_TO_PHYS(mpte);
+	firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
+	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
+	KASSERT((oldpde & PG_A) != 0,
+	    ("pmap_demote_pde: oldpde is missing PG_A"));
+	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
+	    ("pmap_demote_pde: oldpde is missing PG_M"));
+	newpte = oldpde & ~PG_PS;
+	if ((newpte & PG_PDE_PAT) != 0)
+		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
+
+	/*
+	 * If the page table page is new, initialize it.
+	 */
+	if (mpte->wire_count == 1) {
+		mpte->wire_count = NPTEPG;
+		pmap_fill_ptp(firstpte, newpte);
+	}
+	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
+	    ("pmap_demote_pde: firstpte and newpte map different physical"
+	    " addresses"));
+
+	/*
+	 * If the mapping has changed attributes, update the page table
+	 * entries.
+	 */
+	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
+		pmap_fill_ptp(firstpte, newpte);
+
+	/*
+	 * The spare PV entries must be reserved prior to demoting the
+	 * mapping, that is, prior to changing the PDE.  Otherwise, the state
+	 * of the PDE and the PV lists will be inconsistent, which can result
+	 * in reclaim_pv_chunk() attempting to remove a PV entry from the
+	 * wrong PV list and pmap_pv_demote_pde() failing to find the expected
+	 * PV entry for the 2MB page mapping that is being demoted.
+	 */
+	if ((oldpde & PG_MANAGED) != 0)
+		reserve_pv_entries(pmap, NPTEPG - 1, lockp);
+
+	/*
+	 * Demote the mapping.  This pmap is locked.  The old PDE has
+	 * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
+	 * set.  Thus, there is no danger of a race with another
+	 * processor changing the setting of PG_A and/or PG_M between
+	 * the read above and the store below. 
+	 */
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, newpde);
+	else
+		pde_store(pde, newpde);
+
+	/*
+	 * Invalidate a stale recursive mapping of the page table page.
+	 */
+	if (va >= VM_MAXUSER_ADDRESS)
+		pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+
+	/*
+	 * Demote the PV entry.
+	 */
+	if ((oldpde & PG_MANAGED) != 0)
+		pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
+
+	atomic_add_long(&pmap_pde_demotions, 1);
+	CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx"
+	    " in pmap %p", va, pmap);
+	return (TRUE);
+}
+
+/*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static int
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+    vm_page_t *free, struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	pd_entry_t oldpde;
+	vm_offset_t eva, va;
+	vm_page_t m, mpte;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((sva & PDRMASK) == 0,
+	    ("pmap_remove_pde: sva is not 2mpage aligned"));
+	oldpde = pte_load_clear(pdq);
+	if (oldpde & PG_W)
+		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+	/*
+	 * Machines that don't support invlpg, also don't support
+	 * PG_G.
+	 */
+	if (oldpde & PG_G)
+		pmap_invalidate_page(kernel_pmap, sva);
+	pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
+	if (oldpde & PG_MANAGED) {
+		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME);
+		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
+		pmap_pvh_free(pvh, pmap, sva);
+		eva = sva + NBPDR;
+		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
+		    va < eva; va += PAGE_SIZE, m++) {
+			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
+				vm_page_dirty(m);
+			if (oldpde & PG_A)
+				vm_page_aflag_set(m, PGA_REFERENCED);
+			if (TAILQ_EMPTY(&m->md.pv_list) &&
+			    TAILQ_EMPTY(&pvh->pv_list))
+				vm_page_aflag_clear(m, PGA_WRITEABLE);
+		}
+	}
+	if (pmap == kernel_pmap) {
+		if (!pmap_demote_pde_locked(pmap, pdq, sva, lockp))
+			panic("pmap_remove_pde: failed demotion");
+	} else {
+		mpte = pmap_lookup_pt_page(pmap, sva);
+		if (mpte != NULL) {
+			pmap_remove_pt_page(pmap, mpte);
+			pmap_resident_count_dec(pmap, 1);
+			KASSERT(mpte->wire_count == NPTEPG,
+			    ("pmap_remove_pde: pte page wire count error"));
+			mpte->wire_count = 0;
+			pmap_add_delayed_free_list(mpte, free, FALSE);
+			atomic_subtract_int(&cnt.v_wire_count, 1);
+		}
+	}
+	return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
+}
+
+/*
+ * pmap_remove_pte: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, 
+    pd_entry_t ptepde, vm_page_t *free, struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	pt_entry_t oldpte;
+	vm_page_t m;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpte = pte_load_clear(ptq);
+	if (oldpte & PG_W)
+		pmap->pm_stats.wired_count -= 1;
+	pmap_resident_count_dec(pmap, 1);
+	if (oldpte & PG_MANAGED) {
+		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
+		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+			vm_page_dirty(m);
+		if (oldpte & PG_A)
+			vm_page_aflag_set(m, PGA_REFERENCED);
+		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
+		pmap_pvh_free(&m->md, pmap, va);
+		if (TAILQ_EMPTY(&m->md.pv_list) &&
+		    (m->flags & PG_FICTITIOUS) == 0) {
+			pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+			if (TAILQ_EMPTY(&pvh->pv_list))
+				vm_page_aflag_clear(m, PGA_WRITEABLE);
+		}
+	}
+	return (pmap_unuse_pt(pmap, va, ptepde, free));
+}
+
+/*
+ * Remove a single page from a process address space
+ */
+static void
+pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free)
+{
+	struct rwlock *lock;
+	pt_entry_t *pte;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if ((*pde & PG_V) == 0)
+		return;
+	pte = pmap_pde_to_pte(pde, va);
+	if ((*pte & PG_V) == 0)
+		return;
+	lock = NULL;
+	pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
+	if (lock != NULL)
+		rw_wunlock(lock);
+	pmap_invalidate_page(pmap, va);
+}
+
+/*
+ *	Remove the given range of addresses from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	struct rwlock *lock;
+	vm_offset_t va, va_next;
+	pml4_entry_t *pml4e;
+	pdp_entry_t *pdpe;
+	pd_entry_t ptpaddr, *pde;
+	pt_entry_t *pte;
+	vm_page_t free = NULL;
+	int anyvalid;
+
+	/*
+	 * Perform an unsynchronized read.  This is, however, safe.
+	 */
+	if (pmap->pm_stats.resident_count == 0)
+		return;
+
+	anyvalid = 0;
+
+	rw_rlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+
+	/*
+	 * special handling of removing one page.  a very
+	 * common operation and easy to short circuit some
+	 * code.
+	 */
+	if (sva + PAGE_SIZE == eva) {
+		pde = pmap_pde(pmap, sva);
+		if (pde && (*pde & PG_PS) == 0) {
+			pmap_remove_page(pmap, sva, pde, &free);
+			goto out;
+		}
+	}
+
+	lock = NULL;
+	for (; sva < eva; sva = va_next) {
+
+		if (pmap->pm_stats.resident_count == 0)
+			break;
+
+		pml4e = pmap_pml4e(pmap, sva);
+		if ((*pml4e & PG_V) == 0) {
+			va_next = (sva + NBPML4) & ~PML4MASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+
+		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+		if ((*pdpe & PG_V) == 0) {
+			va_next = (sva + NBPDP) & ~PDPMASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+
+		/*
+		 * Calculate index for next page table.
+		 */
+		va_next = (sva + NBPDR) & ~PDRMASK;
+		if (va_next < sva)
+			va_next = eva;
+
+		pde = pmap_pdpe_to_pde(pdpe, sva);
+		ptpaddr = *pde;
+
+		/*
+		 * Weed out invalid mappings.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+			/*
+			 * Are we removing the entire large page?  If not,
+			 * demote the mapping and fall through.
+			 */
+			if (sva + NBPDR == va_next && eva >= va_next) {
+				/*
+				 * The TLB entry for a PG_G mapping is
+				 * invalidated by pmap_remove_pde().
+				 */
+				if ((ptpaddr & PG_G) == 0)
+					anyvalid = 1;
+				pmap_remove_pde(pmap, pde, sva, &free, &lock);
+				continue;
+			} else if (!pmap_demote_pde_locked(pmap, pde, sva,
+			    &lock)) {
+				/* The large page mapping was destroyed. */
+				continue;
+			} else
+				ptpaddr = *pde;
+		}
+
+		/*
+		 * Limit our scan to either the end of the va represented
+		 * by the current page table page, or to the end of the
+		 * range being removed.
+		 */
+		if (va_next > eva)
+			va_next = eva;
+
+		va = va_next;
+		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+		    sva += PAGE_SIZE) {
+			if (*pte == 0) {
+				if (va != va_next) {
+					pmap_invalidate_range(pmap, va, sva);
+					va = va_next;
+				}
+				continue;
+			}
+			if ((*pte & PG_G) == 0)
+				anyvalid = 1;
+			else if (va == va_next)
+				va = sva;
+			if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free,
+			    &lock)) {
+				sva += PAGE_SIZE;
+				break;
+			}
+		}
+		if (va != va_next)
+			pmap_invalidate_range(pmap, va, sva);
+	}
+	if (lock != NULL)
+		rw_wunlock(lock);
+out:
+	if (anyvalid)
+		pmap_invalidate_all(pmap);
+	rw_runlock(&pvh_global_lock);	
+	PMAP_UNLOCK(pmap);
+	pmap_free_zero_pages(free);
+}
+
+/*
+ *	Routine:	pmap_remove_all
+ *	Function:
+ *		Removes this physical page from
+ *		all physical maps in which it resides.
+ *		Reflects back modify bits to the pager.
+ *
+ *	Notes:
+ *		Original versions of this routine were very
+ *		inefficient because they iteratively called
+ *		pmap_remove (slow...)
+ */
+
+void
+pmap_remove_all(vm_page_t m)
+{
+	struct md_page *pvh;
+	pv_entry_t pv;
+	pmap_t pmap;
+	pt_entry_t *pte, tpte;
+	pd_entry_t *pde;
+	vm_offset_t va;
+	vm_page_t free;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_remove_all: page %p is not managed", m));
+	free = NULL;
+	rw_wlock(&pvh_global_lock);
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		goto small_mappings;
+	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		va = pv->pv_va;
+		pde = pmap_pde(pmap, va);
+		(void)pmap_demote_pde(pmap, pde, va);
+		PMAP_UNLOCK(pmap);
+	}
+small_mappings:
+	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pmap_resident_count_dec(pmap, 1);
+		pde = pmap_pde(pmap, pv->pv_va);
+		KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
+		    " a 2mpage in page %p's pv list", m));
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
+		tpte = pte_load_clear(pte);
+		if (tpte & PG_W)
+			pmap->pm_stats.wired_count--;
+		if (tpte & PG_A)
+			vm_page_aflag_set(m, PGA_REFERENCED);
+
+		/*
+		 * Update the vm_page_t clean and reference bits.
+		 */
+		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+			vm_page_dirty(m);
+		pmap_unuse_pt(pmap, pv->pv_va, *pde, &free);
+		pmap_invalidate_page(pmap, pv->pv_va);
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+		free_pv_entry(pmap, pv);
+		PMAP_UNLOCK(pmap);
+	}
+	vm_page_aflag_clear(m, PGA_WRITEABLE);
+	rw_wunlock(&pvh_global_lock);
+	pmap_free_zero_pages(free);
+}
+
+/*
+ * pmap_protect_pde: do the things to protect a 2mpage in a process
+ */
+static boolean_t
+pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
+{
+	pd_entry_t newpde, oldpde;
+	vm_offset_t eva, va;
+	vm_page_t m;
+	boolean_t anychanged;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((sva & PDRMASK) == 0,
+	    ("pmap_protect_pde: sva is not 2mpage aligned"));
+	anychanged = FALSE;
+retry:
+	oldpde = newpde = *pde;
+	if (oldpde & PG_MANAGED) {
+		eva = sva + NBPDR;
+		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
+		    va < eva; va += PAGE_SIZE, m++)
+			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
+				vm_page_dirty(m);
+	}
+	if ((prot & VM_PROT_WRITE) == 0)
+		newpde &= ~(PG_RW | PG_M);
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		newpde |= pg_nx;
+	if (newpde != oldpde) {
+		if (!atomic_cmpset_long(pde, oldpde, newpde))
+			goto retry;
+		if (oldpde & PG_G)
+			pmap_invalidate_page(pmap, sva);
+		else
+			anychanged = TRUE;
+	}
+	return (anychanged);
+}
+
+/*
+ *	Set the physical protection on the
+ *	specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+	vm_offset_t va_next;
+	pml4_entry_t *pml4e;
+	pdp_entry_t *pdpe;
+	pd_entry_t ptpaddr, *pde;
+	pt_entry_t *pte;
+	boolean_t anychanged, pv_lists_locked;
+
+	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+		pmap_remove(pmap, sva, eva);
+		return;
+	}
+
+	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
+	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
+		return;
+
+	pv_lists_locked = FALSE;
+resume:
+	anychanged = FALSE;
+
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = va_next) {
+
+		pml4e = pmap_pml4e(pmap, sva);
+		if ((*pml4e & PG_V) == 0) {
+			va_next = (sva + NBPML4) & ~PML4MASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+
+		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+		if ((*pdpe & PG_V) == 0) {
+			va_next = (sva + NBPDP) & ~PDPMASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+
+		va_next = (sva + NBPDR) & ~PDRMASK;
+		if (va_next < sva)
+			va_next = eva;
+
+		pde = pmap_pdpe_to_pde(pdpe, sva);
+		ptpaddr = *pde;
+
+		/*
+		 * Weed out invalid mappings.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+			/*
+			 * Are we protecting the entire large page?  If not,
+			 * demote the mapping and fall through.
+			 */
+			if (sva + NBPDR == va_next && eva >= va_next) {
+				/*
+				 * The TLB entry for a PG_G mapping is
+				 * invalidated by pmap_protect_pde().
+				 */
+				if (pmap_protect_pde(pmap, pde, sva, prot))
+					anychanged = TRUE;
+				continue;
+			} else {
+				if (!pv_lists_locked) {
+					pv_lists_locked = TRUE;
+					if (!rw_try_rlock(&pvh_global_lock)) {
+						if (anychanged)
+							pmap_invalidate_all(
+							    pmap);
+						PMAP_UNLOCK(pmap);
+						rw_rlock(&pvh_global_lock);
+						goto resume;
+					}
+				}
+				if (!pmap_demote_pde(pmap, pde, sva)) {
+					/*
+					 * The large page mapping was
+					 * destroyed.
+					 */
+					continue;
+				}
+			}
+		}
+
+		if (va_next > eva)
+			va_next = eva;
+
+		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+		    sva += PAGE_SIZE) {
+			pt_entry_t obits, pbits;
+			vm_page_t m;
+
+retry:
+			obits = pbits = *pte;
+			if ((pbits & PG_V) == 0)
+				continue;
+
+			if ((prot & VM_PROT_WRITE) == 0) {
+				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
+				    (PG_MANAGED | PG_M | PG_RW)) {
+					m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
+					vm_page_dirty(m);
+				}
+				pbits &= ~(PG_RW | PG_M);
+			}
+			if ((prot & VM_PROT_EXECUTE) == 0)
+				pbits |= pg_nx;
+
+			if (pbits != obits) {
+				if (!atomic_cmpset_long(pte, obits, pbits))
+					goto retry;
+				if (obits & PG_G)
+					pmap_invalidate_page(pmap, sva);
+				else
+					anychanged = TRUE;
+			}
+		}
+	}
+	if (anychanged)
+		pmap_invalidate_all(pmap);
+	if (pv_lists_locked)
+		rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Tries to promote the 512, contiguous 4KB page mappings that are within a
+ * single page table page (PTP) to a single 2MB page mapping.  For promotion
+ * to occur, two conditions must be met: (1) the 4KB page mappings must map
+ * aligned, contiguous physical memory and (2) the 4KB page mappings must have
+ * identical characteristics. 
+ */
+static void
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp)
+{
+	pd_entry_t newpde;
+	pt_entry_t *firstpte, oldpte, pa, *pte;
+	vm_offset_t oldpteva;
+	vm_page_t mpte;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	/*
+	 * Examine the first PTE in the specified PTP.  Abort if this PTE is
+	 * either invalid, unused, or does not map the first 4KB physical page
+	 * within a 2MB page. 
+	 */
+	firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+setpde:
+	newpde = *firstpte;
+	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
+		atomic_add_long(&pmap_pde_p_failures, 1);
+		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+		    " in pmap %p", va, pmap);
+		return;
+	}
+	if ((newpde & (PG_M | PG_RW)) == PG_RW) {
+		/*
+		 * When PG_M is already clear, PG_RW can be cleared without
+		 * a TLB invalidation.
+		 */
+		if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW))
+			goto setpde;
+		newpde &= ~PG_RW;
+	}
+
+	/*
+	 * Examine each of the other PTEs in the specified PTP.  Abort if this
+	 * PTE maps an unexpected 4KB physical page or does not have identical
+	 * characteristics to the first PTE.
+	 */
+	pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+	for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
+setpte:
+		oldpte = *pte;
+		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+			atomic_add_long(&pmap_pde_p_failures, 1);
+			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return;
+		}
+		if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
+			/*
+			 * When PG_M is already clear, PG_RW can be cleared
+			 * without a TLB invalidation.
+			 */
+			if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW))
+				goto setpte;
+			oldpte &= ~PG_RW;
+			oldpteva = (oldpte & PG_FRAME & PDRMASK) |
+			    (va & ~PDRMASK);
+			CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
+			    " in pmap %p", oldpteva, pmap);
+		}
+		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
+			atomic_add_long(&pmap_pde_p_failures, 1);
+			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return;
+		}
+		pa -= PAGE_SIZE;
+	}
+
+	/*
+	 * Save the page table page in its current state until the PDE
+	 * mapping the superpage is demoted by pmap_demote_pde() or
+	 * destroyed by pmap_remove_pde(). 
+	 */
+	mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+	KASSERT(mpte >= vm_page_array &&
+	    mpte < &vm_page_array[vm_page_array_size],
+	    ("pmap_promote_pde: page table page is out of range"));
+	KASSERT(mpte->pindex == pmap_pde_pindex(va),
+	    ("pmap_promote_pde: page table page's pindex is wrong"));
+	pmap_insert_pt_page(pmap, mpte);
+
+	/*
+	 * Promote the pv entries.
+	 */
+	if ((newpde & PG_MANAGED) != 0)
+		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
+
+	/*
+	 * Propagate the PAT index to its proper position.
+	 */
+	if ((newpde & PG_PTE_PAT) != 0)
+		newpde ^= PG_PDE_PAT | PG_PTE_PAT;
+
+	/*
+	 * Map the superpage.
+	 */
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+	else
+		pde_store(pde, PG_PS | newpde);
+
+	atomic_add_long(&pmap_pde_promotions, 1);
+	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
+	    " in pmap %p", va, pmap);
+}
+
+/*
+ *	Insert the given physical page (p) at
+ *	the specified virtual address (v) in the
+ *	target physical map with the protection requested.
+ *
+ *	If specified, the page will be wired down, meaning
+ *	that the related pte can not be reclaimed.
+ *
+ *	NB:  This is the only routine which MAY NOT lazy-evaluate
+ *	or lose information.  That is, this routine must actually
+ *	insert this page into the given map NOW.
+ */
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
+    vm_prot_t prot, boolean_t wired)
+{
+	struct rwlock *lock;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	pt_entry_t newpte, origpte;
+	pv_entry_t pv;
+	vm_paddr_t opa, pa;
+	vm_page_t mpte, om;
+
+	va = trunc_page(va);
+	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
+	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
+	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
+	    va));
+	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
+	    va >= kmi.clean_eva,
+	    ("pmap_enter: managed mapping within the clean submap"));
+	if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
+		VM_OBJECT_ASSERT_WLOCKED(m->object);
+	pa = VM_PAGE_TO_PHYS(m);
+	newpte = (pt_entry_t)(pa | PG_A | PG_V);
+	if ((access & VM_PROT_WRITE) != 0)
+		newpte |= PG_M;
+	if ((prot & VM_PROT_WRITE) != 0)
+		newpte |= PG_RW;
+	KASSERT((newpte & (PG_M | PG_RW)) != PG_M,
+	    ("pmap_enter: access includes VM_PROT_WRITE but prot doesn't"));
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		newpte |= pg_nx;
+	if (wired)
+		newpte |= PG_W;
+	if (va < VM_MAXUSER_ADDRESS)
+		newpte |= PG_U;
+	if (pmap == kernel_pmap)
+		newpte |= PG_G;
+	newpte |= pmap_cache_bits(m->md.pat_mode, 0);
+
+	mpte = NULL;
+
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+retry:
+	pde = pmap_pde(pmap, va);
+	if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 ||
+	    pmap_demote_pde_locked(pmap, pde, va, &lock))) {
+		pte = pmap_pde_to_pte(pde, va);
+		if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
+			mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+			mpte->wire_count++;
+		}
+	} else if (va < VM_MAXUSER_ADDRESS) {
+		/*
+		 * Here if the pte page isn't mapped, or if it has been
+		 * deallocated.
+		 */
+		mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), &lock);
+		goto retry;
+	} else
+		panic("pmap_enter: invalid page directory va=%#lx", va);
+
+	origpte = *pte;
+
+	/*
+	 * Is the specified virtual address already mapped?
+	 */
+	if ((origpte & PG_V) != 0) {
+		/*
+		 * Wiring change, just update stats. We don't worry about
+		 * wiring PT pages as they remain resident as long as there
+		 * are valid mappings in them. Hence, if a user page is wired,
+		 * the PT page will be also.
+		 */
+		if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0)
+			pmap->pm_stats.wired_count++;
+		else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0)
+			pmap->pm_stats.wired_count--;
+
+		/*
+		 * Remove the extra PT page reference.
+		 */
+		if (mpte != NULL) {
+			mpte->wire_count--;
+			KASSERT(mpte->wire_count > 0,
+			    ("pmap_enter: missing reference to page table page,"
+			     " va: 0x%lx", va));
+		}
+
+		/*
+		 * Has the physical page changed?
+		 */
+		opa = origpte & PG_FRAME;
+		if (opa == pa) {
+			/*
+			 * No, might be a protection or wiring change.
+			 */
+			if ((origpte & PG_MANAGED) != 0) {
+				newpte |= PG_MANAGED;
+				if ((newpte & PG_RW) != 0)
+					vm_page_aflag_set(m, PGA_WRITEABLE);
+			}
+			if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0)
+				goto unchanged;
+			goto validate;
+		}
+	} else {
+		/*
+		 * Increment the counters.
+		 */
+		if ((newpte & PG_W) != 0)
+			pmap->pm_stats.wired_count++;
+		pmap_resident_count_inc(pmap, 1);
+	}
+
+	/*
+	 * Enter on the PV list if part of our managed memory.
+	 */
+	if ((m->oflags & VPO_UNMANAGED) == 0) {
+		newpte |= PG_MANAGED;
+		pv = get_pv_entry(pmap, &lock);
+		pv->pv_va = va;
+		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+		if ((newpte & PG_RW) != 0)
+			vm_page_aflag_set(m, PGA_WRITEABLE);
+	}
+
+	/*
+	 * Update the PTE.
+	 */
+	if ((origpte & PG_V) != 0) {
+validate:
+		origpte = pte_load_store(pte, newpte);
+		opa = origpte & PG_FRAME;
+		if (opa != pa) {
+			if ((origpte & PG_MANAGED) != 0) {
+				om = PHYS_TO_VM_PAGE(opa);
+				if ((origpte & (PG_M | PG_RW)) == (PG_M |
+				    PG_RW))
+					vm_page_dirty(om);
+				if ((origpte & PG_A) != 0)
+					vm_page_aflag_set(om, PGA_REFERENCED);
+				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
+				pmap_pvh_free(&om->md, pmap, va);
+				if ((om->aflags & PGA_WRITEABLE) != 0 &&
+				    TAILQ_EMPTY(&om->md.pv_list) &&
+				    ((om->flags & PG_FICTITIOUS) != 0 ||
+				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
+					vm_page_aflag_clear(om, PGA_WRITEABLE);
+			}
+		} else if ((newpte & PG_M) == 0 && (origpte & (PG_M |
+		    PG_RW)) == (PG_M | PG_RW)) {
+			if ((origpte & PG_MANAGED) != 0)
+				vm_page_dirty(m);
+
+			/*
+			 * Although the PTE may still have PG_RW set, TLB
+			 * invalidation may nonetheless be required because
+			 * the PTE no longer has PG_M set.
+			 */
+		} else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) {
+			/*
+			 * This PTE change does not require TLB invalidation.
+			 */
+			goto unchanged;
+		}
+		if ((origpte & PG_A) != 0)
+			pmap_invalidate_page(pmap, va);
+	} else
+		pte_store(pte, newpte);
+
+unchanged:
+
+	/*
+	 * If both the page table page and the reservation are fully
+	 * populated, then attempt promotion.
+	 */
+	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
+	    pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
+	    vm_reserv_level_iffullpop(m) == 0)
+		pmap_promote_pde(pmap, pde, va, &lock);
+
+	if (lock != NULL)
+		rw_wunlock(lock);
+	rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Tries to create a 2MB page mapping.  Returns TRUE if successful and FALSE
+ * otherwise.  Fails if (1) a page table page cannot be allocated without
+ * blocking, (2) a mapping already exists at the specified virtual address, or
+ * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
+ */
+static boolean_t
+pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+    struct rwlock **lockp)
+{
+	pd_entry_t *pde, newpde;
+	vm_page_t free, mpde;
+
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
+		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
+		    " in pmap %p", va, pmap);
+		return (FALSE);
+	}
+	pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpde));
+	pde = &pde[pmap_pde_index(va)];
+	if ((*pde & PG_V) != 0) {
+		KASSERT(mpde->wire_count > 1,
+		    ("pmap_enter_pde: mpde's wire count is too low"));
+		mpde->wire_count--;
+		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
+		    " in pmap %p", va, pmap);
+		return (FALSE);
+	}
+	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
+	    PG_PS | PG_V;
+	if ((m->oflags & VPO_UNMANAGED) == 0) {
+		newpde |= PG_MANAGED;
+
+		/*
+		 * Abort this mapping if its PV entry could not be created.
+		 */
+		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m),
+		    lockp)) {
+			free = NULL;
+			if (pmap_unwire_ptp(pmap, va, mpde, &free)) {
+				pmap_invalidate_page(pmap, va);
+				pmap_free_zero_pages(free);
+			}
+			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return (FALSE);
+		}
+	}
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		newpde |= pg_nx;
+	if (va < VM_MAXUSER_ADDRESS)
+		newpde |= PG_U;
+
+	/*
+	 * Increment counters.
+	 */
+	pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
+
+	/*
+	 * Map the superpage.
+	 */
+	pde_store(pde, newpde);
+
+	atomic_add_long(&pmap_pde_mappings, 1);
+	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
+	    " in pmap %p", va, pmap);
+	return (TRUE);
+}
+
+/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start.  This page is
+ * mapped at the given virtual address start.  Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object.  The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end.  Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+void
+pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
+    vm_page_t m_start, vm_prot_t prot)
+{
+	struct rwlock *lock;
+	vm_offset_t va;
+	vm_page_t m, mpte;
+	vm_pindex_t diff, psize;
+
+	VM_OBJECT_ASSERT_WLOCKED(m_start->object);
+	psize = atop(end - start);
+	mpte = NULL;
+	m = m_start;
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+		va = start + ptoa(diff);
+		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
+		    (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
+		    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
+		    pmap_enter_pde(pmap, va, m, prot, &lock))
+			m = &m[NBPDR / PAGE_SIZE - 1];
+		else
+			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
+			    mpte, &lock);
+		m = TAILQ_NEXT(m, listq);
+	}
+	if (lock != NULL)
+		rw_wunlock(lock);
+	rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+{
+	struct rwlock *lock;
+
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
+	if (lock != NULL)
+		rw_wunlock(lock);
+	rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+}
+
+static vm_page_t
+pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
+{
+	vm_page_t free;
+	pt_entry_t *pte;
+	vm_paddr_t pa;
+
+	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
+	    (m->oflags & VPO_UNMANAGED) != 0,
+	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
+	rw_assert(&pvh_global_lock, RA_LOCKED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+	if (va < VM_MAXUSER_ADDRESS) {
+		vm_pindex_t ptepindex;
+		pd_entry_t *ptepa;
+
+		/*
+		 * Calculate pagetable page index
+		 */
+		ptepindex = pmap_pde_pindex(va);
+		if (mpte && (mpte->pindex == ptepindex)) {
+			mpte->wire_count++;
+		} else {
+			/*
+			 * Get the page directory entry
+			 */
+			ptepa = pmap_pde(pmap, va);
+
+			/*
+			 * If the page table page is mapped, we just increment
+			 * the hold count, and activate it.  Otherwise, we
+			 * attempt to allocate a page table page.  If this
+			 * attempt fails, we don't retry.  Instead, we give up.
+			 */
+			if (ptepa && (*ptepa & PG_V) != 0) {
+				if (*ptepa & PG_PS)
+					return (NULL);
+				mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
+				mpte->wire_count++;
+			} else {
+				/*
+				 * Pass NULL instead of the PV list lock
+				 * pointer, because we don't intend to sleep.
+				 */
+				mpte = _pmap_allocpte(pmap, ptepindex, NULL);
+				if (mpte == NULL)
+					return (mpte);
+			}
+		}
+		pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
+		pte = &pte[pmap_pte_index(va)];
+	} else {
+		mpte = NULL;
+		pte = vtopte(va);
+	}
+	if (*pte) {
+		if (mpte != NULL) {
+			mpte->wire_count--;
+			mpte = NULL;
+		}
+		return (mpte);
+	}
+
+	/*
+	 * Enter on the PV list if part of our managed memory.
+	 */
+	if ((m->oflags & VPO_UNMANAGED) == 0 &&
+	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
+		if (mpte != NULL) {
+			free = NULL;
+			if (pmap_unwire_ptp(pmap, va, mpte, &free)) {
+				pmap_invalidate_page(pmap, va);
+				pmap_free_zero_pages(free);
+			}
+			mpte = NULL;
+		}
+		return (mpte);
+	}
+
+	/*
+	 * Increment counters
+	 */
+	pmap_resident_count_inc(pmap, 1);
+
+	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		pa |= pg_nx;
+
+	/*
+	 * Now validate mapping with RO protection
+	 */
+	if ((m->oflags & VPO_UNMANAGED) != 0)
+		pte_store(pte, pa | PG_V | PG_U);
+	else
+		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+	return (mpte);
+}
+
+/*
+ * Make a temporary mapping for a physical address.  This is only intended
+ * to be used for panic dumps.
+ */
+void *
+pmap_kenter_temporary(vm_paddr_t pa, int i)
+{
+	vm_offset_t va;
+
+	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
+	pmap_kenter(va, pa);
+	invlpg(va);
+	return ((void *)crashdumpmap);
+}
+
+/*
+ * This code maps large physical mmap regions into the
+ * processor address space.  Note that some shortcuts
+ * are taken, but the code works.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+    vm_pindex_t pindex, vm_size_t size)
+{
+	pd_entry_t *pde;
+	vm_paddr_t pa, ptepa;
+	vm_page_t p, pdpg;
+	int pat_mode;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
+	    ("pmap_object_init_pt: non-device object"));
+	if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
+		if (!vm_object_populate(object, pindex, pindex + atop(size)))
+			return;
+		p = vm_page_lookup(object, pindex);
+		KASSERT(p->valid == VM_PAGE_BITS_ALL,
+		    ("pmap_object_init_pt: invalid page %p", p));
+		pat_mode = p->md.pat_mode;
+
+		/*
+		 * Abort the mapping if the first page is not physically
+		 * aligned to a 2MB page boundary.
+		 */
+		ptepa = VM_PAGE_TO_PHYS(p);
+		if (ptepa & (NBPDR - 1))
+			return;
+
+		/*
+		 * Skip the first page.  Abort the mapping if the rest of
+		 * the pages are not physically contiguous or have differing
+		 * memory attributes.
+		 */
+		p = TAILQ_NEXT(p, listq);
+		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
+		    pa += PAGE_SIZE) {
+			KASSERT(p->valid == VM_PAGE_BITS_ALL,
+			    ("pmap_object_init_pt: invalid page %p", p));
+			if (pa != VM_PAGE_TO_PHYS(p) ||
+			    pat_mode != p->md.pat_mode)
+				return;
+			p = TAILQ_NEXT(p, listq);
+		}
+
+		/*
+		 * Map using 2MB pages.  Since "ptepa" is 2M aligned and
+		 * "size" is a multiple of 2M, adding the PAT setting to "pa"
+		 * will not affect the termination of this loop.
+		 */ 
+		PMAP_LOCK(pmap);
+		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
+		    size; pa += NBPDR) {
+			pdpg = pmap_allocpde(pmap, addr, NULL);
+			if (pdpg == NULL) {
+				/*
+				 * The creation of mappings below is only an
+				 * optimization.  If a page directory page
+				 * cannot be allocated without blocking,
+				 * continue on to the next mapping rather than
+				 * blocking.
+				 */
+				addr += NBPDR;
+				continue;
+			}
+			pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
+			pde = &pde[pmap_pde_index(addr)];
+			if ((*pde & PG_V) == 0) {
+				pde_store(pde, pa | PG_PS | PG_M | PG_A |
+				    PG_U | PG_RW | PG_V);
+				pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
+				atomic_add_long(&pmap_pde_mappings, 1);
+			} else {
+				/* Continue on if the PDE is already valid. */
+				pdpg->wire_count--;
+				KASSERT(pdpg->wire_count > 0,
+				    ("pmap_object_init_pt: missing reference "
+				    "to page directory page, va: 0x%lx", addr));
+			}
+			addr += NBPDR;
+		}
+		PMAP_UNLOCK(pmap);
+	}
+}
+
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
+{
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	boolean_t pv_lists_locked;
+
+	pv_lists_locked = FALSE;
+
+	/*
+	 * Wiring is not a hardware characteristic so there is no need to
+	 * invalidate TLB.
+	 */
+retry:
+	PMAP_LOCK(pmap);
+	pde = pmap_pde(pmap, va);
+	if ((*pde & PG_PS) != 0) {
+		if (!wired != ((*pde & PG_W) == 0)) {
+			if (!pv_lists_locked) {
+				pv_lists_locked = TRUE;
+				if (!rw_try_rlock(&pvh_global_lock)) {
+					PMAP_UNLOCK(pmap);
+					rw_rlock(&pvh_global_lock);
+					goto retry;
+				}
+			}
+			if (!pmap_demote_pde(pmap, pde, va))
+				panic("pmap_change_wiring: demotion failed");
+		} else
+			goto out;
+	}
+	pte = pmap_pde_to_pte(pde, va);
+	if (wired && (*pte & PG_W) == 0) {
+		pmap->pm_stats.wired_count++;
+		atomic_set_long(pte, PG_W);
+	} else if (!wired && (*pte & PG_W) != 0) {
+		pmap->pm_stats.wired_count--;
+		atomic_clear_long(pte, PG_W);
+	}
+out:
+	if (pv_lists_locked)
+		rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Copy the range specified by src_addr/len
+ *	from the source map to the range dst_addr/len
+ *	in the destination map.
+ *
+ *	This routine is only advisory and need not do anything.
+ */
+
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
+    vm_offset_t src_addr)
+{
+	struct rwlock *lock;
+	vm_page_t   free;
+	vm_offset_t addr;
+	vm_offset_t end_addr = src_addr + len;
+	vm_offset_t va_next;
+
+	if (dst_addr != src_addr)
+		return;
+
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
+	if (dst_pmap < src_pmap) {
+		PMAP_LOCK(dst_pmap);
+		PMAP_LOCK(src_pmap);
+	} else {
+		PMAP_LOCK(src_pmap);
+		PMAP_LOCK(dst_pmap);
+	}
+	for (addr = src_addr; addr < end_addr; addr = va_next) {
+		pt_entry_t *src_pte, *dst_pte;
+		vm_page_t dstmpde, dstmpte, srcmpte;
+		pml4_entry_t *pml4e;
+		pdp_entry_t *pdpe;
+		pd_entry_t srcptepaddr, *pde;
+
+		KASSERT(addr < UPT_MIN_ADDRESS,
+		    ("pmap_copy: invalid to pmap_copy page tables"));
+
+		pml4e = pmap_pml4e(src_pmap, addr);
+		if ((*pml4e & PG_V) == 0) {
+			va_next = (addr + NBPML4) & ~PML4MASK;
+			if (va_next < addr)
+				va_next = end_addr;
+			continue;
+		}
+
+		pdpe = pmap_pml4e_to_pdpe(pml4e, addr);
+		if ((*pdpe & PG_V) == 0) {
+			va_next = (addr + NBPDP) & ~PDPMASK;
+			if (va_next < addr)
+				va_next = end_addr;
+			continue;
+		}
+
+		va_next = (addr + NBPDR) & ~PDRMASK;
+		if (va_next < addr)
+			va_next = end_addr;
+
+		pde = pmap_pdpe_to_pde(pdpe, addr);
+		srcptepaddr = *pde;
+		if (srcptepaddr == 0)
+			continue;
+			
+		if (srcptepaddr & PG_PS) {
+			dstmpde = pmap_allocpde(dst_pmap, addr, NULL);
+			if (dstmpde == NULL)
+				break;
+			pde = (pd_entry_t *)
+			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde));
+			pde = &pde[pmap_pde_index(addr)];
+			if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
+			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
+			    PG_PS_FRAME, &lock))) {
+				*pde = srcptepaddr & ~PG_W;
+				pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
+			} else
+				dstmpde->wire_count--;
+			continue;
+		}
+
+		srcptepaddr &= PG_FRAME;
+		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+		KASSERT(srcmpte->wire_count > 0,
+		    ("pmap_copy: source page table page is unused"));
+
+		if (va_next > end_addr)
+			va_next = end_addr;
+
+		src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
+		src_pte = &src_pte[pmap_pte_index(addr)];
+		dstmpte = NULL;
+		while (addr < va_next) {
+			pt_entry_t ptetemp;
+			ptetemp = *src_pte;
+			/*
+			 * we only virtual copy managed pages
+			 */
+			if ((ptetemp & PG_MANAGED) != 0) {
+				if (dstmpte != NULL &&
+				    dstmpte->pindex == pmap_pde_pindex(addr))
+					dstmpte->wire_count++;
+				else if ((dstmpte = pmap_allocpte(dst_pmap,
+				    addr, NULL)) == NULL)
+					goto out;
+				dst_pte = (pt_entry_t *)
+				    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
+				dst_pte = &dst_pte[pmap_pte_index(addr)];
+				if (*dst_pte == 0 &&
+				    pmap_try_insert_pv_entry(dst_pmap, addr,
+				    PHYS_TO_VM_PAGE(ptetemp & PG_FRAME),
+				    &lock)) {
+					/*
+					 * Clear the wired, modified, and
+					 * accessed (referenced) bits
+					 * during the copy.
+					 */
+					*dst_pte = ptetemp & ~(PG_W | PG_M |
+					    PG_A);
+					pmap_resident_count_inc(dst_pmap, 1);
+	 			} else {
+					free = NULL;
+					if (pmap_unwire_ptp(dst_pmap, addr,
+					    dstmpte, &free)) {
+					    	pmap_invalidate_page(dst_pmap,
+					 	    addr);
+				    	    	pmap_free_zero_pages(free);
+					}
+					goto out;
+				}
+				if (dstmpte->wire_count >= srcmpte->wire_count)
+					break;
+			}
+			addr += PAGE_SIZE;
+			src_pte++;
+		}
+	}
+out:
+	if (lock != NULL)
+		rw_wunlock(lock);
+	rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(src_pmap);
+	PMAP_UNLOCK(dst_pmap);
+}	
+
+/*
+ *	pmap_zero_page zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+
+	pagezero((void *)va);
+}
+
+/*
+ *	pmap_zero_page_area zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ *
+ *	off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+
+	if (off == 0 && size == PAGE_SIZE)
+		pagezero((void *)va);
+	else
+		bzero((char *)va + off, size);
+}
+
+/*
+ *	pmap_zero_page_idle zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.  This
+ *	is intended to be called from the vm_pagezero process only and
+ *	outside of Giant.
+ */
+void
+pmap_zero_page_idle(vm_page_t m)
+{
+	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+
+	pagezero((void *)va);
+}
+
+/*
+ *	pmap_copy_page copies the specified (machine independent)
+ *	page by mapping the page into virtual memory and using
+ *	bcopy to copy the page, one machine dependent page at a
+ *	time.
+ */
+void
+pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
+{
+	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
+	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
+
+	pagecopy((void *)src, (void *)dst);
+}
+
+int unmapped_buf_allowed = 1;
+
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_cp = (char *)PHYS_TO_DMAP(ma[a_offset >> PAGE_SHIFT]->
+		    phys_addr) + a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_cp = (char *)PHYS_TO_DMAP(mb[b_offset >> PAGE_SHIFT]->
+		    phys_addr) + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
+/*
+ * Returns true if the pmap's pv is one of the first
+ * 16 pvs linked to from this page.  This count may
+ * be changed upwards or downwards in the future; it
+ * is only necessary that true be returned for a small
+ * subset of pmaps for proper page aging.
+ */
+boolean_t
+pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
+{
+	struct md_page *pvh;
+	struct rwlock *lock;
+	pv_entry_t pv;
+	int loops = 0;
+	boolean_t rv;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_page_exists_quick: page %p is not managed", m));
+	rv = FALSE;
+	rw_rlock(&pvh_global_lock);
+	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+	rw_rlock(lock);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+		if (PV_PMAP(pv) == pmap) {
+			rv = TRUE;
+			break;
+		}
+		loops++;
+		if (loops >= 16)
+			break;
+	}
+	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
+		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+			if (PV_PMAP(pv) == pmap) {
+				rv = TRUE;
+				break;
+			}
+			loops++;
+			if (loops >= 16)
+				break;
+		}
+	}
+	rw_runlock(lock);
+	rw_runlock(&pvh_global_lock);
+	return (rv);
+}
+
+/*
+ *	pmap_page_wired_mappings:
+ *
+ *	Return the number of managed mappings to the given physical page
+ *	that are wired.
+ */
+int
+pmap_page_wired_mappings(vm_page_t m)
+{
+	int count;
+
+	count = 0;
+	if ((m->oflags & VPO_UNMANAGED) != 0)
+		return (count);
+	rw_wlock(&pvh_global_lock);
+	count = pmap_pvh_wired_mappings(&m->md, count);
+	if ((m->flags & PG_FICTITIOUS) == 0) {
+	    count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
+	        count);
+	}
+	rw_wunlock(&pvh_global_lock);
+	return (count);
+}
+
+/*
+ *	pmap_pvh_wired_mappings:
+ *
+ *	Return the updated number "count" of managed mappings that are wired.
+ */
+static int
+pmap_pvh_wired_mappings(struct md_page *pvh, int count)
+{
+	pmap_t pmap;
+	pt_entry_t *pte;
+	pv_entry_t pv;
+
+	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pte = pmap_pte(pmap, pv->pv_va);
+		if ((*pte & PG_W) != 0)
+			count++;
+		PMAP_UNLOCK(pmap);
+	}
+	return (count);
+}
+
+/*
+ * Returns TRUE if the given page is mapped individually or as part of
+ * a 2mpage.  Otherwise, returns FALSE.
+ */
+boolean_t
+pmap_page_is_mapped(vm_page_t m)
+{
+	struct rwlock *lock;
+	boolean_t rv;
+
+	if ((m->oflags & VPO_UNMANAGED) != 0)
+		return (FALSE);
+	rw_rlock(&pvh_global_lock);
+	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+	rw_rlock(lock);
+	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
+	    ((m->flags & PG_FICTITIOUS) == 0 &&
+	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
+	rw_runlock(lock);
+	rw_runlock(&pvh_global_lock);
+	return (rv);
+}
+
+/*
+ * Remove all pages from specified address space
+ * this aids process exit speeds.  Also, this code
+ * is special cased for current process only, but
+ * can have the more generic (and slightly slower)
+ * mode enabled.  This is much faster than pmap_remove
+ * in the case of running down an entire address space.
+ */
+void
+pmap_remove_pages(pmap_t pmap)
+{
+	pd_entry_t ptepde;
+	pt_entry_t *pte, tpte;
+	vm_page_t free = NULL;
+	vm_page_t m, mpte, mt;
+	pv_entry_t pv;
+	struct md_page *pvh;
+	struct pv_chunk *pc, *npc;
+	struct rwlock *lock;
+	int64_t bit;
+	uint64_t inuse, bitmask;
+	int allfree, field, freed, idx;
+
+	if (pmap != PCPU_GET(curpmap)) {
+		printf("warning: pmap_remove_pages called with non-current pmap\n");
+		return;
+	}
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
+		allfree = 1;
+		freed = 0;
+		for (field = 0; field < _NPCM; field++) {
+			inuse = ~pc->pc_map[field] & pc_freemask[field];
+			while (inuse != 0) {
+				bit = bsfq(inuse);
+				bitmask = 1UL << bit;
+				idx = field * 64 + bit;
+				pv = &pc->pc_pventry[idx];
+				inuse &= ~bitmask;
+
+				pte = pmap_pdpe(pmap, pv->pv_va);
+				ptepde = *pte;
+				pte = pmap_pdpe_to_pde(pte, pv->pv_va);
+				tpte = *pte;
+				if ((tpte & (PG_PS | PG_V)) == PG_V) {
+					ptepde = tpte;
+					pte = (pt_entry_t *)PHYS_TO_DMAP(tpte &
+					    PG_FRAME);
+					pte = &pte[pmap_pte_index(pv->pv_va)];
+					tpte = *pte & ~PG_PTE_PAT;
+				}
+				if ((tpte & PG_V) == 0) {
+					panic("bad pte va %lx pte %lx",
+					    pv->pv_va, tpte);
+				}
+
+/*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+				if (tpte & PG_W) {
+					allfree = 0;
+					continue;
+				}
+
+				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
+				KASSERT(m->phys_addr == (tpte & PG_FRAME),
+				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+				    m, (uintmax_t)m->phys_addr,
+				    (uintmax_t)tpte));
+
+				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
+				    m < &vm_page_array[vm_page_array_size],
+				    ("pmap_remove_pages: bad tpte %#jx",
+				    (uintmax_t)tpte));
+
+				pte_clear(pte);
+
+				/*
+				 * Update the vm_page_t clean/reference bits.
+				 */
+				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+					if ((tpte & PG_PS) != 0) {
+						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
+							vm_page_dirty(mt);
+					} else
+						vm_page_dirty(m);
+				}
+
+				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+
+				/* Mark free */
+				pc->pc_map[field] |= bitmask;
+				if ((tpte & PG_PS) != 0) {
+					pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
+					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
+					TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
+					if (TAILQ_EMPTY(&pvh->pv_list)) {
+						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
+							if ((mt->aflags & PGA_WRITEABLE) != 0 &&
+							    TAILQ_EMPTY(&mt->md.pv_list))
+								vm_page_aflag_clear(mt, PGA_WRITEABLE);
+					}
+					mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
+					if (mpte != NULL) {
+						pmap_remove_pt_page(pmap, mpte);
+						pmap_resident_count_dec(pmap, 1);
+						KASSERT(mpte->wire_count == NPTEPG,
+						    ("pmap_remove_pages: pte page wire count error"));
+						mpte->wire_count = 0;
+						pmap_add_delayed_free_list(mpte, &free, FALSE);
+						atomic_subtract_int(&cnt.v_wire_count, 1);
+					}
+				} else {
+					pmap_resident_count_dec(pmap, 1);
+					TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+					if ((m->aflags & PGA_WRITEABLE) != 0 &&
+					    TAILQ_EMPTY(&m->md.pv_list) &&
+					    (m->flags & PG_FICTITIOUS) == 0) {
+						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+						if (TAILQ_EMPTY(&pvh->pv_list))
+							vm_page_aflag_clear(m, PGA_WRITEABLE);
+					}
+				}
+				pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
+				freed++;
+			}
+		}
+		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
+		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
+		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
+		if (allfree) {
+			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+			free_pv_chunk(pc);
+		}
+	}
+	if (lock != NULL)
+		rw_wunlock(lock);
+	pmap_invalidate_all(pmap);
+	rw_runlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
+	pmap_free_zero_pages(free);
+}
+
+/*
+ *	pmap_is_modified:
+ *
+ *	Return whether or not the specified physical page was modified
+ *	in any physical maps.
+ */
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+	boolean_t rv;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_is_modified: page %p is not managed", m));
+
+	/*
+	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
+	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
+	 * is clear, no PTEs can have PG_M set.
+	 */
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	if ((m->oflags & VPO_BUSY) == 0 &&
+	    (m->aflags & PGA_WRITEABLE) == 0)
+		return (FALSE);
+	rw_wlock(&pvh_global_lock);
+	rv = pmap_is_modified_pvh(&m->md) ||
+	    ((m->flags & PG_FICTITIOUS) == 0 &&
+	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
+	rw_wunlock(&pvh_global_lock);
+	return (rv);
+}
+
+/*
+ * Returns TRUE if any of the given mappings were used to modify
+ * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
+ * mappings are supported.
+ */
+static boolean_t
+pmap_is_modified_pvh(struct md_page *pvh)
+{
+	pv_entry_t pv;
+	pt_entry_t *pte;
+	pmap_t pmap;
+	boolean_t rv;
+
+	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	rv = FALSE;
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pte = pmap_pte(pmap, pv->pv_va);
+		rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
+		PMAP_UNLOCK(pmap);
+		if (rv)
+			break;
+	}
+	return (rv);
+}
+
+/*
+ *	pmap_is_prefaultable:
+ *
+ *	Return whether or not the specified virtual address is elgible
+ *	for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	boolean_t rv;
+
+	rv = FALSE;
+	PMAP_LOCK(pmap);
+	pde = pmap_pde(pmap, addr);
+	if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
+		pte = pmap_pde_to_pte(pde, addr);
+		rv = (*pte & PG_V) == 0;
+	}
+	PMAP_UNLOCK(pmap);
+	return (rv);
+}
+
+/*
+ *	pmap_is_referenced:
+ *
+ *	Return whether or not the specified physical page was referenced
+ *	in any physical maps.
+ */
+boolean_t
+pmap_is_referenced(vm_page_t m)
+{
+	boolean_t rv;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_is_referenced: page %p is not managed", m));
+	rw_wlock(&pvh_global_lock);
+	rv = pmap_is_referenced_pvh(&m->md) ||
+	    ((m->flags & PG_FICTITIOUS) == 0 &&
+	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
+	rw_wunlock(&pvh_global_lock);
+	return (rv);
+}
+
+/*
+ * Returns TRUE if any of the given mappings were referenced and FALSE
+ * otherwise.  Both page and 2mpage mappings are supported.
+ */
+static boolean_t
+pmap_is_referenced_pvh(struct md_page *pvh)
+{
+	pv_entry_t pv;
+	pt_entry_t *pte;
+	pmap_t pmap;
+	boolean_t rv;
+
+	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	rv = FALSE;
+	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pte = pmap_pte(pmap, pv->pv_va);
+		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
+		PMAP_UNLOCK(pmap);
+		if (rv)
+			break;
+	}
+	return (rv);
+}
+
+/*
+ * Clear the write and modified bits in each of the given page's mappings.
+ */
+void
+pmap_remove_write(vm_page_t m)
+{
+	struct md_page *pvh;
+	pmap_t pmap;
+	pv_entry_t next_pv, pv;
+	pd_entry_t *pde;
+	pt_entry_t oldpte, *pte;
+	vm_offset_t va;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_remove_write: page %p is not managed", m));
+
+	/*
+	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
+	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
+	 * is clear, no page table entries need updating.
+	 */
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	if ((m->oflags & VPO_BUSY) == 0 &&
+	    (m->aflags & PGA_WRITEABLE) == 0)
+		return;
+	rw_wlock(&pvh_global_lock);
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		goto small_mappings;
+	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		va = pv->pv_va;
+		pde = pmap_pde(pmap, va);
+		if ((*pde & PG_RW) != 0)
+			(void)pmap_demote_pde(pmap, pde, va);
+		PMAP_UNLOCK(pmap);
+	}
+small_mappings:
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pde = pmap_pde(pmap, pv->pv_va);
+		KASSERT((*pde & PG_PS) == 0,
+		    ("pmap_remove_write: found a 2mpage in page %p's pv list",
+		    m));
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
+retry:
+		oldpte = *pte;
+		if (oldpte & PG_RW) {
+			if (!atomic_cmpset_long(pte, oldpte, oldpte &
+			    ~(PG_RW | PG_M)))
+				goto retry;
+			if ((oldpte & PG_M) != 0)
+				vm_page_dirty(m);
+			pmap_invalidate_page(pmap, pv->pv_va);
+		}
+		PMAP_UNLOCK(pmap);
+	}
+	vm_page_aflag_clear(m, PGA_WRITEABLE);
+	rw_wunlock(&pvh_global_lock);
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return a count of reference bits for a page, clearing those bits.
+ *	It is not necessary for every reference bit to be cleared, but it
+ *	is necessary that 0 only be returned when there are truly no
+ *	reference bits set.
+ *
+ *	XXX: The exact number of bits to check and clear is a matter that
+ *	should be tested and standardized at some point in the future for
+ *	optimal aging of shared pages.
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+	struct md_page *pvh;
+	pv_entry_t pv, pvf, pvn;
+	pmap_t pmap;
+	pd_entry_t oldpde, *pde;
+	pt_entry_t *pte;
+	vm_offset_t va;
+	int rtval = 0;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_ts_referenced: page %p is not managed", m));
+	rw_wlock(&pvh_global_lock);
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		goto small_mappings;
+	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		va = pv->pv_va;
+		pde = pmap_pde(pmap, va);
+		oldpde = *pde;
+		if ((oldpde & PG_A) != 0) {
+			if (pmap_demote_pde(pmap, pde, va)) {
+				if ((oldpde & PG_W) == 0) {
+					/*
+					 * Remove the mapping to a single page
+					 * so that a subsequent access may
+					 * repromote.  Since the underlying
+					 * page table page is fully populated,
+					 * this removal never frees a page
+					 * table page.
+					 */
+					va += VM_PAGE_TO_PHYS(m) - (oldpde &
+					    PG_PS_FRAME);
+					pmap_remove_page(pmap, va, pde, NULL);
+					rtval++;
+					if (rtval > 4) {
+						PMAP_UNLOCK(pmap);
+						goto out;
+					}
+				}
+			}
+		}
+		PMAP_UNLOCK(pmap);
+	}
+small_mappings:
+	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+		pvf = pv;
+		do {
+			pvn = TAILQ_NEXT(pv, pv_next);
+			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
+			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+			pmap = PV_PMAP(pv);
+			PMAP_LOCK(pmap);
+			pde = pmap_pde(pmap, pv->pv_va);
+			KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
+			    " found a 2mpage in page %p's pv list", m));
+			pte = pmap_pde_to_pte(pde, pv->pv_va);
+			if ((*pte & PG_A) != 0) {
+				atomic_clear_long(pte, PG_A);
+				pmap_invalidate_page(pmap, pv->pv_va);
+				rtval++;
+				if (rtval > 4)
+					pvn = NULL;
+			}
+			PMAP_UNLOCK(pmap);
+		} while ((pv = pvn) != NULL && pv != pvf);
+	}
+out:
+	rw_wunlock(&pvh_global_lock);
+	return (rtval);
+}
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+	struct md_page *pvh;
+	pmap_t pmap;
+	pv_entry_t next_pv, pv;
+	pd_entry_t oldpde, *pde;
+	pt_entry_t oldpte, *pte;
+	vm_offset_t va;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_clear_modify: page %p is not managed", m));
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+	KASSERT((m->oflags & VPO_BUSY) == 0,
+	    ("pmap_clear_modify: page %p is busy", m));
+
+	/*
+	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
+	 * If the object containing the page is locked and the page is not
+	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
+	 */
+	if ((m->aflags & PGA_WRITEABLE) == 0)
+		return;
+	rw_wlock(&pvh_global_lock);
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		goto small_mappings;
+	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		va = pv->pv_va;
+		pde = pmap_pde(pmap, va);
+		oldpde = *pde;
+		if ((oldpde & PG_RW) != 0) {
+			if (pmap_demote_pde(pmap, pde, va)) {
+				if ((oldpde & PG_W) == 0) {
+					/*
+					 * Write protect the mapping to a
+					 * single page so that a subsequent
+					 * write access may repromote.
+					 */
+					va += VM_PAGE_TO_PHYS(m) - (oldpde &
+					    PG_PS_FRAME);
+					pte = pmap_pde_to_pte(pde, va);
+					oldpte = *pte;
+					if ((oldpte & PG_V) != 0) {
+						while (!atomic_cmpset_long(pte,
+						    oldpte,
+						    oldpte & ~(PG_M | PG_RW)))
+							oldpte = *pte;
+						vm_page_dirty(m);
+						pmap_invalidate_page(pmap, va);
+					}
+				}
+			}
+		}
+		PMAP_UNLOCK(pmap);
+	}
+small_mappings:
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pde = pmap_pde(pmap, pv->pv_va);
+		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
+		    " a 2mpage in page %p's pv list", m));
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
+		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+			atomic_clear_long(pte, PG_M);
+			pmap_invalidate_page(pmap, pv->pv_va);
+		}
+		PMAP_UNLOCK(pmap);
+	}
+	rw_wunlock(&pvh_global_lock);
+}
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+void
+pmap_clear_reference(vm_page_t m)
+{
+	struct md_page *pvh;
+	pmap_t pmap;
+	pv_entry_t next_pv, pv;
+	pd_entry_t oldpde, *pde;
+	pt_entry_t *pte;
+	vm_offset_t va;
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_clear_reference: page %p is not managed", m));
+	rw_wlock(&pvh_global_lock);
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		goto small_mappings;
+	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		va = pv->pv_va;
+		pde = pmap_pde(pmap, va);
+		oldpde = *pde;
+		if ((oldpde & PG_A) != 0) {
+			if (pmap_demote_pde(pmap, pde, va)) {
+				/*
+				 * Remove the mapping to a single page so
+				 * that a subsequent access may repromote.
+				 * Since the underlying page table page is
+				 * fully populated, this removal never frees
+				 * a page table page.
+				 */
+				va += VM_PAGE_TO_PHYS(m) - (oldpde &
+				    PG_PS_FRAME);
+				pmap_remove_page(pmap, va, pde, NULL);
+			}
+		}
+		PMAP_UNLOCK(pmap);
+	}
+small_mappings:
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+		pmap = PV_PMAP(pv);
+		PMAP_LOCK(pmap);
+		pde = pmap_pde(pmap, pv->pv_va);
+		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
+		    " a 2mpage in page %p's pv list", m));
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
+		if (*pte & PG_A) {
+			atomic_clear_long(pte, PG_A);
+			pmap_invalidate_page(pmap, pv->pv_va);
+		}
+		PMAP_UNLOCK(pmap);
+	}
+	rw_wunlock(&pvh_global_lock);
+}
+
+/*
+ * Miscellaneous support routines follow
+ */
+
+/* Adjust the cache mode for a 4KB page mapped via a PTE. */
+static __inline void
+pmap_pte_attr(pt_entry_t *pte, int cache_bits)
+{
+	u_int opte, npte;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PTE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opte = *(u_int *)pte;
+		npte = opte & ~PG_PTE_CACHE;
+		npte |= cache_bits;
+	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
+}
+
+/* Adjust the cache mode for a 2MB page mapped via a PDE. */
+static __inline void
+pmap_pde_attr(pd_entry_t *pde, int cache_bits)
+{
+	u_int opde, npde;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PDE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opde = *(u_int *)pde;
+		npde = opde & ~PG_PDE_CACHE;
+		npde |= cache_bits;
+	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
+}
+
+/*
+ * Map a set of physical memory pages into the kernel virtual
+ * address space. Return a pointer to where it is mapped. This
+ * routine is intended to be used for mapping device memory,
+ * NOT real memory.
+ */
+void *
+pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
+{
+	vm_offset_t va, offset;
+	vm_size_t tmpsize;
+
+	/*
+	 * If the specified range of physical addresses fits within the direct
+	 * map window, use the direct map. 
+	 */
+	if (pa < dmaplimit && pa + size < dmaplimit) {
+		va = PHYS_TO_DMAP(pa);
+		if (!pmap_change_attr(va, size, mode))
+			return ((void *)va);
+	}
+	offset = pa & PAGE_MASK;
+	size = round_page(offset + size);
+	va = kmem_alloc_nofault(kernel_map, size);
+	if (!va)
+		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
+	pa = trunc_page(pa);
+	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
+		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
+	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
+	pmap_invalidate_cache_range(va, va + tmpsize);
+	return ((void *)(va + offset));
+}
+
+void *
+pmap_mapdev(vm_paddr_t pa, vm_size_t size)
+{
+
+	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
+}
+
+void *
+pmap_mapbios(vm_paddr_t pa, vm_size_t size)
+{
+
+	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
+}
+
+void
+pmap_unmapdev(vm_offset_t va, vm_size_t size)
+{
+	vm_offset_t base, offset;
+
+	/* If we gave a direct map region in pmap_mapdev, do nothing */
+	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
+		return;
+	base = trunc_page(va);
+	offset = va & PAGE_MASK;
+	size = round_page(offset + size);
+	kmem_free(kernel_map, base, size);
+}
+
+/*
+ * Tries to demote a 1GB page mapping.
+ */
+static boolean_t
+pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
+{
+	pdp_entry_t newpdpe, oldpdpe;
+	pd_entry_t *firstpde, newpde, *pde;
+	vm_paddr_t mpdepa;
+	vm_page_t mpde;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpdpe = *pdpe;
+	KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V),
+	    ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V"));
+	if ((mpde = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT |
+	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
+		CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx"
+		    " in pmap %p", va, pmap);
+		return (FALSE);
+	}
+	mpdepa = VM_PAGE_TO_PHYS(mpde);
+	firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa);
+	newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V;
+	KASSERT((oldpdpe & PG_A) != 0,
+	    ("pmap_demote_pdpe: oldpdpe is missing PG_A"));
+	KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW,
+	    ("pmap_demote_pdpe: oldpdpe is missing PG_M"));
+	newpde = oldpdpe;
+
+	/*
+	 * Initialize the page directory page.
+	 */
+	for (pde = firstpde; pde < firstpde + NPDEPG; pde++) {
+		*pde = newpde;
+		newpde += NBPDR;
+	}
+
+	/*
+	 * Demote the mapping.
+	 */
+	*pdpe = newpdpe;
+
+	/*
+	 * Invalidate a stale recursive mapping of the page directory page.
+	 */
+	pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va));
+
+	pmap_pdpe_demotions++;
+	CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx"
+	    " in pmap %p", va, pmap);
+	return (TRUE);
+}
+
+/*
+ * Sets the memory attribute for the specified page.
+ */
+void
+pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
+{
+
+	m->md.pat_mode = ma;
+
+	/*
+	 * If "m" is a normal page, update its direct mapping.  This update
+	 * can be relied upon to perform any cache operations that are
+	 * required for data coherence.
+	 */
+	if ((m->flags & PG_FICTITIOUS) == 0 &&
+	    pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
+	    m->md.pat_mode))
+		panic("memory attribute change on the direct map failed");
+}
+
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the direct map or the kernel map.  If
+ * the virtual address range is contained within the kernel map, then the
+ * memory type for each of the corresponding ranges of the direct map is also
+ * changed.  (The corresponding ranges of the direct map are those ranges that
+ * map the same physical pages as the specified virtual address range.)  These
+ * changes to the direct map are necessary because Intel describes the
+ * behavior of their processors as "undefined" if two or more mappings to the
+ * same physical page have different memory types.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.  In the
+ * latter case, the memory type may have been changed on some part of the
+ * virtual address range or the direct map.
+ */
+int
+pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
+{
+	int error;
+
+	PMAP_LOCK(kernel_pmap);
+	error = pmap_change_attr_locked(va, size, mode);
+	PMAP_UNLOCK(kernel_pmap);
+	return (error);
+}
+
+static int
+pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
+{
+	vm_offset_t base, offset, tmpva;
+	vm_paddr_t pa_start, pa_end;
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	int cache_bits_pte, cache_bits_pde, error;
+	boolean_t changed;
+
+	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
+	base = trunc_page(va);
+	offset = va & PAGE_MASK;
+	size = round_page(offset + size);
+
+	/*
+	 * Only supported on kernel virtual addresses, including the direct
+	 * map but excluding the recursive map.
+	 */
+	if (base < DMAP_MIN_ADDRESS)
+		return (EINVAL);
+
+	cache_bits_pde = pmap_cache_bits(mode, 1);
+	cache_bits_pte = pmap_cache_bits(mode, 0);
+	changed = FALSE;
+
+	/*
+	 * Pages that aren't mapped aren't supported.  Also break down 2MB pages
+	 * into 4KB pages if required.
+	 */
+	for (tmpva = base; tmpva < base + size; ) {
+		pdpe = pmap_pdpe(kernel_pmap, tmpva);
+		if (*pdpe == 0)
+			return (EINVAL);
+		if (*pdpe & PG_PS) {
+			/*
+			 * If the current 1GB page already has the required
+			 * memory type, then we need not demote this page. Just
+			 * increment tmpva to the next 1GB page frame.
+			 */
+			if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) {
+				tmpva = trunc_1gpage(tmpva) + NBPDP;
+				continue;
+			}
+
+			/*
+			 * If the current offset aligns with a 1GB page frame
+			 * and there is at least 1GB left within the range, then
+			 * we need not break down this page into 2MB pages.
+			 */
+			if ((tmpva & PDPMASK) == 0 &&
+			    tmpva + PDPMASK < base + size) {
+				tmpva += NBPDP;
+				continue;
+			}
+			if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva))
+				return (ENOMEM);
+		}
+		pde = pmap_pdpe_to_pde(pdpe, tmpva);
+		if (*pde == 0)
+			return (EINVAL);
+		if (*pde & PG_PS) {
+			/*
+			 * If the current 2MB page already has the required
+			 * memory type, then we need not demote this page. Just
+			 * increment tmpva to the next 2MB page frame.
+			 */
+			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
+				tmpva = trunc_2mpage(tmpva) + NBPDR;
+				continue;
+			}
+
+			/*
+			 * If the current offset aligns with a 2MB page frame
+			 * and there is at least 2MB left within the range, then
+			 * we need not break down this page into 4KB pages.
+			 */
+			if ((tmpva & PDRMASK) == 0 &&
+			    tmpva + PDRMASK < base + size) {
+				tmpva += NBPDR;
+				continue;
+			}
+			if (!pmap_demote_pde(kernel_pmap, pde, tmpva))
+				return (ENOMEM);
+		}
+		pte = pmap_pde_to_pte(pde, tmpva);
+		if (*pte == 0)
+			return (EINVAL);
+		tmpva += PAGE_SIZE;
+	}
+	error = 0;
+
+	/*
+	 * Ok, all the pages exist, so run through them updating their
+	 * cache mode if required.
+	 */
+	pa_start = pa_end = 0;
+	for (tmpva = base; tmpva < base + size; ) {
+		pdpe = pmap_pdpe(kernel_pmap, tmpva);
+		if (*pdpe & PG_PS) {
+			if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) {
+				pmap_pde_attr(pdpe, cache_bits_pde);
+				changed = TRUE;
+			}
+			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
+				if (pa_start == pa_end) {
+					/* Start physical address run. */
+					pa_start = *pdpe & PG_PS_FRAME;
+					pa_end = pa_start + NBPDP;
+				} else if (pa_end == (*pdpe & PG_PS_FRAME))
+					pa_end += NBPDP;
+				else {
+					/* Run ended, update direct map. */
+					error = pmap_change_attr_locked(
+					    PHYS_TO_DMAP(pa_start),
+					    pa_end - pa_start, mode);
+					if (error != 0)
+						break;
+					/* Start physical address run. */
+					pa_start = *pdpe & PG_PS_FRAME;
+					pa_end = pa_start + NBPDP;
+				}
+			}
+			tmpva = trunc_1gpage(tmpva) + NBPDP;
+			continue;
+		}
+		pde = pmap_pdpe_to_pde(pdpe, tmpva);
+		if (*pde & PG_PS) {
+			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
+				pmap_pde_attr(pde, cache_bits_pde);
+				changed = TRUE;
+			}
+			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
+				if (pa_start == pa_end) {
+					/* Start physical address run. */
+					pa_start = *pde & PG_PS_FRAME;
+					pa_end = pa_start + NBPDR;
+				} else if (pa_end == (*pde & PG_PS_FRAME))
+					pa_end += NBPDR;
+				else {
+					/* Run ended, update direct map. */
+					error = pmap_change_attr_locked(
+					    PHYS_TO_DMAP(pa_start),
+					    pa_end - pa_start, mode);
+					if (error != 0)
+						break;
+					/* Start physical address run. */
+					pa_start = *pde & PG_PS_FRAME;
+					pa_end = pa_start + NBPDR;
+				}
+			}
+			tmpva = trunc_2mpage(tmpva) + NBPDR;
+		} else {
+			pte = pmap_pde_to_pte(pde, tmpva);
+			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
+				pmap_pte_attr(pte, cache_bits_pte);
+				changed = TRUE;
+			}
+			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
+				if (pa_start == pa_end) {
+					/* Start physical address run. */
+					pa_start = *pte & PG_FRAME;
+					pa_end = pa_start + PAGE_SIZE;
+				} else if (pa_end == (*pte & PG_FRAME))
+					pa_end += PAGE_SIZE;
+				else {
+					/* Run ended, update direct map. */
+					error = pmap_change_attr_locked(
+					    PHYS_TO_DMAP(pa_start),
+					    pa_end - pa_start, mode);
+					if (error != 0)
+						break;
+					/* Start physical address run. */
+					pa_start = *pte & PG_FRAME;
+					pa_end = pa_start + PAGE_SIZE;
+				}
+			}
+			tmpva += PAGE_SIZE;
+		}
+	}
+	if (error == 0 && pa_start != pa_end)
+		error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start),
+		    pa_end - pa_start, mode);
+
+	/*
+	 * Flush CPU caches if required to make sure any data isn't cached that
+	 * shouldn't be, etc.
+	 */
+	if (changed) {
+		pmap_invalidate_range(kernel_pmap, base, tmpva);
+		pmap_invalidate_cache_range(base, tmpva);
+	}
+	return (error);
+}
+
+/*
+ * Demotes any mapping within the direct map region that covers more than the
+ * specified range of physical addresses.  This range's size must be a power
+ * of two and its starting address must be a multiple of its size.  Since the
+ * demotion does not change any attributes of the mapping, a TLB invalidation
+ * is not mandatory.  The caller may, however, request a TLB invalidation.
+ */
+void
+pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+	vm_offset_t va;
+	boolean_t changed;
+
+	if (len == 0)
+		return;
+	KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2"));
+	KASSERT((base & (len - 1)) == 0,
+	    ("pmap_demote_DMAP: base is not a multiple of len"));
+	if (len < NBPDP && base < dmaplimit) {
+		va = PHYS_TO_DMAP(base);
+		changed = FALSE;
+		PMAP_LOCK(kernel_pmap);
+		pdpe = pmap_pdpe(kernel_pmap, va);
+		if ((*pdpe & PG_V) == 0)
+			panic("pmap_demote_DMAP: invalid PDPE");
+		if ((*pdpe & PG_PS) != 0) {
+			if (!pmap_demote_pdpe(kernel_pmap, pdpe, va))
+				panic("pmap_demote_DMAP: PDPE failed");
+			changed = TRUE;
+		}
+		if (len < NBPDR) {
+			pde = pmap_pdpe_to_pde(pdpe, va);
+			if ((*pde & PG_V) == 0)
+				panic("pmap_demote_DMAP: invalid PDE");
+			if ((*pde & PG_PS) != 0) {
+				if (!pmap_demote_pde(kernel_pmap, pde, va))
+					panic("pmap_demote_DMAP: PDE failed");
+				changed = TRUE;
+			}
+		}
+		if (changed && invalidate)
+			pmap_invalidate_page(kernel_pmap, va);
+		PMAP_UNLOCK(kernel_pmap);
+	}
+}
+
+/*
+ * perform the pmap work for mincore
+ */
+int
+pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
+{
+	pd_entry_t *pdep;
+	pt_entry_t pte;
+	vm_paddr_t pa;
+	int val;
+
+	PMAP_LOCK(pmap);
+retry:
+	pdep = pmap_pde(pmap, addr);
+	if (pdep != NULL && (*pdep & PG_V)) {
+		if (*pdep & PG_PS) {
+			pte = *pdep;
+			/* Compute the physical address of the 4KB page. */
+			pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
+			    PG_FRAME;
+			val = MINCORE_SUPER;
+		} else {
+			pte = *pmap_pde_to_pte(pdep, addr);
+			pa = pte & PG_FRAME;
+			val = 0;
+		}
+	} else {
+		pte = 0;
+		pa = 0;
+		val = 0;
+	}
+	if ((pte & PG_V) != 0) {
+		val |= MINCORE_INCORE;
+		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
+		if ((pte & PG_A) != 0)
+			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
+	}
+	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
+	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
+	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
+		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
+		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
+			goto retry;
+	} else
+		PA_UNLOCK_COND(*locked_pa);
+	PMAP_UNLOCK(pmap);
+	return (val);
+}
+
+void
+pmap_activate(struct thread *td)
+{
+	pmap_t	pmap, oldpmap;
+	u_int	cpuid;
+	u_int64_t  cr3;
+
+	critical_enter();
+	pmap = vmspace_pmap(td->td_proc->p_vmspace);
+	oldpmap = PCPU_GET(curpmap);
+	cpuid = PCPU_GET(cpuid);
+#ifdef SMP
+	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
+	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
+#else
+	CPU_CLR(cpuid, &oldpmap->pm_active);
+	CPU_SET(cpuid, &pmap->pm_active);
+#endif
+	cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
+	td->td_pcb->pcb_cr3 = cr3;
+	load_cr3(cr3);
+	PCPU_SET(curpmap, pmap);
+	critical_exit();
+}
+
+void
+pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
+{
+}
+
+/*
+ *	Increase the starting virtual address of the given mapping if a
+ *	different alignment might result in more superpage mappings.
+ */
+void
+pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
+    vm_offset_t *addr, vm_size_t size)
+{
+	vm_offset_t superpage_offset;
+
+	if (size < NBPDR)
+		return;
+	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
+		offset += ptoa(object->pg_color);
+	superpage_offset = offset & PDRMASK;
+	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
+	    (*addr & PDRMASK) == superpage_offset)
+		return;
+	if ((*addr & PDRMASK) < superpage_offset)
+		*addr = (*addr & ~PDRMASK) + superpage_offset;
+	else
+		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
+}
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+
+DB_SHOW_COMMAND(pte, pmap_print_pte)
+{
+	pmap_t pmap;
+	pml4_entry_t *pml4;
+	pdp_entry_t *pdp;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	vm_offset_t va;
+
+	if (have_addr) {
+		va = (vm_offset_t)addr;
+		pmap = PCPU_GET(curpmap); /* XXX */
+	} else {
+		db_printf("show pte addr\n");
+		return;
+	}
+	pml4 = pmap_pml4e(pmap, va);
+	db_printf("VA %#016lx pml4e %#016lx", va, *pml4);
+	if ((*pml4 & PG_V) == 0) {
+		db_printf("\n");
+		return;
+	}
+	pdp = pmap_pml4e_to_pdpe(pml4, va);
+	db_printf(" pdpe %#016lx", *pdp);
+	if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) {
+		db_printf("\n");
+		return;
+	}
+	pde = pmap_pdpe_to_pde(pdp, va);
+	db_printf(" pde %#016lx", *pde);
+	if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) {
+		db_printf("\n");
+		return;
+	}
+	pte = pmap_pde_to_pte(pde, va);
+	db_printf(" pte %#016lx\n", *pte);
+}
+#endif
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c
new file mode 100644
index 0000000..273c833
--- /dev/null
+++ b/sys/amd64/amd64/prof_machdep.c
@@ -0,0 +1,391 @@
+/*-
+ * Copyright (c) 1996 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef GUPROF
+#if 0
+#include "opt_i586_guprof.h"
+#include "opt_perfmon.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/gmon.h>
+#include <sys/kernel.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <machine/clock.h>
+#if 0
+#include <machine/perfmon.h>
+#endif
+#include <machine/timerreg.h>
+
+#define	CPUTIME_CLOCK_UNINITIALIZED	0
+#define	CPUTIME_CLOCK_I8254		1
+#define	CPUTIME_CLOCK_TSC		2
+#define	CPUTIME_CLOCK_I586_PMC		3
+#define	CPUTIME_CLOCK_I8254_SHIFT	7
+
+int	cputime_bias = 1;	/* initialize for locality of reference */
+
+static int	cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+static u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
+static int	cputime_clock_pmc_init;
+static struct gmonparam saved_gmp;
+#endif
+static int	cputime_prof_active;
+#endif /* GUPROF */
+
+#ifdef __GNUCLIKE_ASM
+__asm("								\n\
+GM_STATE	=	0					\n\
+GMON_PROF_OFF	=	3					\n\
+								\n\
+	.text							\n\
+	.p2align 4,0x90						\n\
+	.globl	__mcount					\n\
+	.type	__mcount,@function				\n\
+__mcount:							\n\
+	#							\n\
+	# Check that we are profiling.  Do it early for speed.	\n\
+	#							\n\
+	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
+	je	.mcount_exit					\n\
+	#							\n\
+	# __mcount is the same as [.]mcount except the caller	\n\
+	# hasn't changed the stack except to call here, so the	\n\
+	# caller's raddr is above our raddr.			\n\
+	#							\n\
+	pushq	%rax						\n\
+	pushq	%rdx						\n\
+	pushq	%rcx						\n\
+	pushq	%rsi						\n\
+	pushq	%rdi						\n\
+	pushq	%r8						\n\
+	pushq	%r9						\n\
+	movq	7*8+8(%rsp),%rdi				\n\
+	jmp	.got_frompc					\n\
+								\n\
+	.p2align 4,0x90						\n\
+	.globl	.mcount						\n\
+.mcount:							\n\
+	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
+	je	.mcount_exit					\n\
+	#							\n\
+	# The caller's stack frame has already been built, so	\n\
+	# %rbp is the caller's frame pointer.  The caller's	\n\
+	# raddr is in the caller's frame following the caller's	\n\
+	# caller's frame pointer.				\n\
+	#							\n\
+	pushq	%rax						\n\
+	pushq	%rdx						\n\
+	pushq	%rcx						\n\
+	pushq	%rsi						\n\
+	pushq	%rdi						\n\
+	pushq	%r8						\n\
+	pushq	%r9						\n\
+	movq	8(%rbp),%rdi					\n\
+.got_frompc:							\n\
+	#							\n\
+	# Our raddr is the caller's pc.				\n\
+	#							\n\
+	movq	7*8(%rsp),%rsi					\n\
+								\n\
+	pushfq							\n\
+	cli							\n\
+	call	mcount						\n\
+	popfq							\n\
+	popq	%r9						\n\
+	popq	%r8						\n\
+	popq	%rdi						\n\
+	popq	%rsi						\n\
+	popq	%rcx						\n\
+	popq	%rdx						\n\
+	popq	%rax						\n\
+.mcount_exit:							\n\
+	ret	$0						\n\
+");
+#else /* !__GNUCLIKE_ASM */
+#error "this file needs to be ported to your compiler"
+#endif /* __GNUCLIKE_ASM */
+
+#ifdef GUPROF
+/*
+ * [.]mexitcount saves the return register(s), loads selfpc and calls
+ * mexitcount(selfpc) to do the work.  Someday it should be in a machine
+ * dependent file together with cputime(), __mcount and [.]mcount.  cputime()
+ * can't just be put in machdep.c because it has to be compiled without -pg.
+ */
+#ifdef __GNUCLIKE_ASM
+__asm("								\n\
+	.text							\n\
+#								\n\
+# Dummy label to be seen when gprof -u hides [.]mexitcount.	\n\
+#								\n\
+	.p2align 4,0x90						\n\
+	.globl	__mexitcount					\n\
+	.type	__mexitcount,@function				\n\
+__mexitcount:							\n\
+	nop							\n\
+								\n\
+GMON_PROF_HIRES	=	4					\n\
+								\n\
+	.p2align 4,0x90						\n\
+	.globl	.mexitcount					\n\
+.mexitcount:							\n\
+	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
+	jne	.mexitcount_exit				\n\
+	pushq	%rax						\n\
+	pushq	%rdx						\n\
+	pushq	%rcx						\n\
+	pushq	%rsi						\n\
+	pushq	%rdi						\n\
+	pushq	%r8						\n\
+	pushq	%r9						\n\
+	movq	7*8(%rsp),%rdi					\n\
+	pushfq							\n\
+	cli							\n\
+	call	mexitcount					\n\
+	popfq							\n\
+	popq	%r9						\n\
+	popq	%r8						\n\
+	popq	%rdi						\n\
+	popq	%rsi						\n\
+	popq	%rcx						\n\
+	popq	%rdx						\n\
+	popq	%rax						\n\
+.mexitcount_exit:						\n\
+	ret	$0						\n\
+");
+#endif /* __GNUCLIKE_ASM */
+
+/*
+ * Return the time elapsed since the last call.  The units are machine-
+ * dependent.
+ */
+int
+cputime()
+{
+	u_int count;
+	int delta;
+#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
+	u_quad_t event_count;
+#endif
+	u_char high, low;
+	static u_int prev_count;
+
+	if (cputime_clock == CPUTIME_CLOCK_TSC) {
+		/*
+		 * Scale the TSC a little to make cputime()'s frequency
+		 * fit in an int, assuming that the TSC frequency fits
+		 * in a u_int.  Use a fixed scale since dynamic scaling
+		 * would be slower and we can't really use the low bit
+		 * of precision.
+		 */
+		count = (u_int)rdtsc() & ~1u;
+		delta = (int)(count - prev_count) >> 1;
+		prev_count = count;
+		return (delta);
+	}
+#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
+	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+		/*
+		 * XXX permon_read() should be inlined so that the
+		 * perfmon module doesn't need to be compiled with
+		 * profiling disabled and so that it is fast.
+		 */
+		perfmon_read(0, &event_count);
+
+		count = (u_int)event_count;
+		delta = (int)(count - prev_count);
+		prev_count = count;
+		return (delta);
+	}
+#endif /* PERFMON && I586_PMC_GUPROF && !SMP */
+
+	/*
+	 * Read the current value of the 8254 timer counter 0.
+	 */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+	count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
+
+	/*
+	 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
+	 * While profiling is enabled, this routine is called at least twice
+	 * per timer reset (for mcounting and mexitcounting hardclock()),
+	 * so at most one reset has occurred since the last call, and one
+	 * has occurred iff the current count is larger than the previous
+	 * count.  This allows counter underflow to be detected faster
+	 * than in microtime().
+	 */
+	delta = prev_count - count;
+	prev_count = count;
+	if ((int) delta <= 0)
+		return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT));
+	return (delta);
+}
+
+static int
+sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS)
+{
+	int clock;
+	int error;
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+	int event;
+	struct pmc pmc;
+#endif
+
+	clock = cputime_clock;
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+	if (clock == CPUTIME_CLOCK_I586_PMC) {
+		pmc.pmc_val = cputime_clock_pmc_conf;
+		clock += pmc.pmc_event;
+	}
+#endif
+	error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req);
+	if (error == 0 && req->newptr != NULL) {
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+		if (clock >= CPUTIME_CLOCK_I586_PMC) {
+			event = clock - CPUTIME_CLOCK_I586_PMC;
+			if (event >= 256)
+				return (EINVAL);
+			pmc.pmc_num = 0;
+			pmc.pmc_event = event;
+			pmc.pmc_unit = 0;
+			pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR;
+			pmc.pmc_mask = 0;
+			cputime_clock_pmc_conf = pmc.pmc_val;
+			cputime_clock = CPUTIME_CLOCK_I586_PMC;
+		} else
+#endif
+		{
+			if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC)
+				return (EINVAL);
+			cputime_clock = clock;
+		}
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW,
+	    0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", "");
+
+/*
+ * The start and stop routines need not be here since we turn off profiling
+ * before calling them.  They are here for convenience.
+ */
+
+void
+startguprof(gp)
+	struct gmonparam *gp;
+{
+	uint64_t freq;
+
+	freq = atomic_load_acq_64(&tsc_freq);
+	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
+		if (freq != 0 && mp_ncpus == 1)
+			cputime_clock = CPUTIME_CLOCK_TSC;
+		else
+			cputime_clock = CPUTIME_CLOCK_I8254;
+	}
+	if (cputime_clock == CPUTIME_CLOCK_TSC) {
+		gp->profrate = freq >> 1;
+		cputime_prof_active = 1;
+	} else
+		gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT;
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+		if (perfmon_avail() &&
+		    perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
+			if (perfmon_start(0) != 0)
+				perfmon_fini(0);
+			else {
+				/* XXX 1 event == 1 us. */
+				gp->profrate = 1000000;
+
+				saved_gmp = *gp;
+
+				/* Zap overheads.  They are invalid. */
+				gp->cputime_overhead = 0;
+				gp->mcount_overhead = 0;
+				gp->mcount_post_overhead = 0;
+				gp->mcount_pre_overhead = 0;
+				gp->mexitcount_overhead = 0;
+				gp->mexitcount_post_overhead = 0;
+				gp->mexitcount_pre_overhead = 0;
+
+				cputime_clock_pmc_init = TRUE;
+			}
+		}
+	}
+#endif /* PERFMON && I586_PMC_GUPROF */
+	cputime_bias = 0;
+	cputime();
+}
+
+void
+stopguprof(gp)
+	struct gmonparam *gp;
+{
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+	if (cputime_clock_pmc_init) {
+		*gp = saved_gmp;
+		perfmon_fini(0);
+		cputime_clock_pmc_init = FALSE;
+	}
+#endif
+	if (cputime_clock == CPUTIME_CLOCK_TSC)
+		cputime_prof_active = 0;
+}
+
+/* If the cpu frequency changed while profiling, report a warning. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+
+	/*
+	 * If there was an error during the transition or
+	 * TSC is P-state invariant, don't do anything.
+	 */
+	if (status != 0 || tsc_is_invariant)
+		return;
+	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
+		printf("warning: cpu freq changed while profiling active\n");
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+
+#endif /* GUPROF */
diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c
new file mode 100644
index 0000000..9fa1917
--- /dev/null
+++ b/sys/amd64/amd64/ptrace_machdep.c
@@ -0,0 +1,152 @@
+/*-
+ * Copyright (c) 2011 Konstantin Belousov <kib@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/ptrace.h>
+#include <sys/sysent.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
+{
+	char *savefpu;
+	int error;
+
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	switch (req) {
+	case PT_GETXSTATE:
+		fpugetregs(td);
+		savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+		error = copyout(savefpu, addr,
+		    cpu_max_ext_state_size - sizeof(struct savefpu));
+		break;
+
+	case PT_SETXSTATE:
+		if (data > cpu_max_ext_state_size - sizeof(struct savefpu)) {
+			error = EINVAL;
+			break;
+		}
+		savefpu = malloc(data, M_TEMP, M_WAITOK);
+		error = copyin(addr, savefpu, data);
+		if (error == 0) {
+			fpugetregs(td);
+			error = fpusetxstate(td, savefpu, data);
+		}
+		free(savefpu, M_TEMP);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+#ifdef COMPAT_FREEBSD32
+#define PT_I386_GETXMMREGS	(PT_FIRSTMACH + 0)
+#define PT_I386_SETXMMREGS	(PT_FIRSTMACH + 1)
+#define PT_I386_GETXSTATE	(PT_FIRSTMACH + 2)
+#define PT_I386_SETXSTATE	(PT_FIRSTMACH + 3)
+
+static int
+cpu32_ptrace(struct thread *td, int req, void *addr, int data)
+{
+	struct savefpu *fpstate;
+	int error;
+
+	switch (req) {
+	case PT_I386_GETXMMREGS:
+		fpugetregs(td);
+		error = copyout(get_pcb_user_save_td(td), addr,
+		    sizeof(*fpstate));
+		break;
+
+	case PT_I386_SETXMMREGS:
+		fpugetregs(td);
+		fpstate = get_pcb_user_save_td(td);
+		error = copyin(addr, fpstate, sizeof(*fpstate));
+		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
+		break;
+
+	case PT_I386_GETXSTATE:
+		error = cpu_ptrace_xstate(td, PT_GETXSTATE, addr, data);
+		break;
+
+	case PT_I386_SETXSTATE:
+		error = cpu_ptrace_xstate(td, PT_SETXSTATE, addr, data);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+#endif
+
+int
+cpu_ptrace(struct thread *td, int req, void *addr, int data)
+{
+	int error;
+
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32))
+		return (cpu32_ptrace(td, req, addr, data));
+#endif
+
+	/* Support old values of PT_GETXSTATE and PT_SETXSTATE. */
+	if (req == PT_FIRSTMACH + 0)
+		req = PT_GETXSTATE;
+	if (req == PT_FIRSTMACH + 1)
+		req = PT_SETXSTATE;
+
+	switch (req) {
+	case PT_GETXSTATE:
+	case PT_SETXSTATE:
+		error = cpu_ptrace_xstate(td, req, addr, data);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
diff --git a/sys/amd64/amd64/sigtramp.S b/sys/amd64/amd64/sigtramp.S
new file mode 100644
index 0000000..a05ea85
--- /dev/null
+++ b/sys/amd64/amd64/sigtramp.S
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm <peter@freeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/syscall.h>
+
+#include <machine/asmacros.h>
+
+#include "assym.s"
+
+	.text
+/**********************************************************************
+ *
+ * Signal trampoline, copied to top of user stack
+ *
+ */
+NON_GPROF_ENTRY(sigcode)
+	call	*SIGF_HANDLER(%rsp)	/* call signal handler */
+	lea	SIGF_UC(%rsp),%rdi	/* get ucontext_t */
+	pushq	$0			/* junk to fake return addr. */
+	movq	$SYS_sigreturn,%rax
+	syscall				/* enter kernel with args */
+0:	hlt				/* trap priviliged instruction */
+	jmp	0b
+
+	ALIGN_TEXT
+esigcode:
+
+	.data
+	.globl	szsigcode
+szsigcode:
+	.long	esigcode-sigcode
diff --git a/sys/amd64/amd64/stack_machdep.c b/sys/amd64/amd64/stack_machdep.c
new file mode 100644
index 0000000..57908e2
--- /dev/null
+++ b/sys/amd64/amd64/stack_machdep.c
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2005 Antoine Brodin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/stack.h>
+
+#include <machine/pcb.h>
+#include <machine/stack.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+static void
+stack_capture(struct stack *st, register_t rbp)
+{
+	struct amd64_frame *frame;
+	vm_offset_t callpc;
+
+	stack_zero(st);
+	frame = (struct amd64_frame *)rbp;
+	while (1) {
+		if (!INKERNEL((long)frame))
+			break;
+		callpc = frame->f_retaddr;
+		if (!INKERNEL(callpc))
+			break;
+		if (stack_put(st, callpc) == -1)
+			break;
+		if (frame->f_frame <= frame ||
+		    (vm_offset_t)frame->f_frame >=
+		    (vm_offset_t)rbp + KSTACK_PAGES * PAGE_SIZE)
+			break;
+		frame = frame->f_frame;
+	}
+}
+
+void
+stack_save_td(struct stack *st, struct thread *td)
+{
+	register_t rbp;
+
+	if (TD_IS_SWAPPED(td))
+		panic("stack_save_td: swapped");
+	if (TD_IS_RUNNING(td))
+		panic("stack_save_td: running");
+
+	rbp = td->td_pcb->pcb_rbp;
+	stack_capture(st, rbp);
+}
+
+void
+stack_save(struct stack *st)
+{
+	register_t rbp;
+
+	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
+	stack_capture(st, rbp);
+}
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
new file mode 100644
index 0000000..fed852c
--- /dev/null
+++ b/sys/amd64/amd64/support.S
@@ -0,0 +1,732 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_ddb.h"
+
+#include <machine/asmacros.h>
+#include <machine/intr_machdep.h>
+#include <machine/pmap.h>
+
+#include "assym.s"
+
+	.text
+
+/*
+ * bcopy family
+ * void bzero(void *buf, u_int len)
+ */
+
+/* done */
+ENTRY(bzero)
+	movq	%rsi,%rcx
+	xorl	%eax,%eax
+	shrq	$3,%rcx
+	cld
+	rep
+	stosq
+	movq	%rsi,%rcx
+	andq	$7,%rcx
+	rep
+	stosb
+	ret
+END(bzero)
+	
+/* Address: %rdi */
+ENTRY(pagezero)
+	movq	$-PAGE_SIZE,%rdx
+	subq	%rdx,%rdi
+	xorl	%eax,%eax
+1:
+	movnti	%rax,(%rdi,%rdx)
+	movnti	%rax,8(%rdi,%rdx)
+	movnti	%rax,16(%rdi,%rdx)
+	movnti	%rax,24(%rdi,%rdx)
+	addq	$32,%rdx
+	jne	1b
+	sfence
+	ret
+END(pagezero)
+
+ENTRY(bcmp)
+	movq	%rdx,%rcx
+	shrq	$3,%rcx
+	cld					/* compare forwards */
+	repe
+	cmpsq
+	jne	1f
+
+	movq	%rdx,%rcx
+	andq	$7,%rcx
+	repe
+	cmpsb
+1:
+	setne	%al
+	movsbl	%al,%eax
+	ret
+END(bcmp)
+
+/*
+ * bcopy(src, dst, cnt)
+ *       rdi, rsi, rdx
+ *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+ENTRY(bcopy)
+	xchgq	%rsi,%rdi
+	movq	%rdx,%rcx
+
+	movq	%rdi,%rax
+	subq	%rsi,%rax
+	cmpq	%rcx,%rax			/* overlapping && src < dst? */
+	jb	1f
+
+	shrq	$3,%rcx				/* copy by 64-bit words */
+	cld					/* nope, copy forwards */
+	rep
+	movsq
+	movq	%rdx,%rcx
+	andq	$7,%rcx				/* any bytes left? */
+	rep
+	movsb
+	ret
+
+	/* ALIGN_TEXT */
+1:
+	addq	%rcx,%rdi			/* copy backwards */
+	addq	%rcx,%rsi
+	decq	%rdi
+	decq	%rsi
+	andq	$7,%rcx				/* any fractional bytes? */
+	std
+	rep
+	movsb
+	movq	%rdx,%rcx			/* copy remainder by 32-bit words */
+	shrq	$3,%rcx
+	subq	$7,%rsi
+	subq	$7,%rdi
+	rep
+	movsq
+	cld
+	ret
+END(bcopy)
+	
+/*
+ * Note: memcpy does not support overlapping copies
+ */
+ENTRY(memcpy)
+	movq	%rdx,%rcx
+	shrq	$3,%rcx				/* copy by 64-bit words */
+	cld					/* copy forwards */
+	rep
+	movsq
+	movq	%rdx,%rcx
+	andq	$7,%rcx				/* any bytes left? */
+	rep
+	movsb
+	ret
+END(memcpy)
+
+/*
+ * pagecopy(%rdi=from, %rsi=to)
+ */
+ENTRY(pagecopy)
+	movq	$-PAGE_SIZE,%rax
+	movq	%rax,%rdx
+	subq	%rax,%rdi
+	subq	%rax,%rsi
+1:
+	prefetchnta (%rdi,%rax)
+	addq	$64,%rax
+	jne	1b
+2:
+	movq	(%rdi,%rdx),%rax
+	movnti	%rax,(%rsi,%rdx)
+	movq	8(%rdi,%rdx),%rax
+	movnti	%rax,8(%rsi,%rdx)
+	movq	16(%rdi,%rdx),%rax
+	movnti	%rax,16(%rsi,%rdx)
+	movq	24(%rdi,%rdx),%rax
+	movnti	%rax,24(%rsi,%rdx)
+	addq	$32,%rdx
+	jne	2b
+	sfence
+	ret
+END(pagecopy)
+
+/* fillw(pat, base, cnt) */  
+/*       %rdi,%rsi, %rdx */
+ENTRY(fillw)
+	movq	%rdi,%rax   
+	movq	%rsi,%rdi
+	movq	%rdx,%rcx
+	cld
+	rep
+	stosw
+	ret
+END(fillw)
+
+/*****************************************************************************/
+/* copyout and fubyte family                                                 */
+/*****************************************************************************/
+/*
+ * Access user memory from inside the kernel. These routines should be
+ * the only places that do this.
+ *
+ * These routines set curpcb->onfault for the time they execute. When a
+ * protection violation occurs inside the functions, the trap handler
+ * returns to *curpcb->onfault instead of the function.
+ */
+
+/*
+ * copyout(from_kernel, to_user, len)  - MP SAFE
+ *         %rdi,        %rsi,    %rdx
+ */
+ENTRY(copyout)
+	movq	PCPU(CURPCB),%rax
+	movq	$copyout_fault,PCB_ONFAULT(%rax)
+	testq	%rdx,%rdx			/* anything to do? */
+	jz	done_copyout
+
+	/*
+	 * Check explicitly for non-user addresses.  If 486 write protection
+	 * is being used, this check is essential because we are in kernel
+	 * mode so the h/w does not provide any protection against writing
+	 * kernel addresses.
+	 */
+
+	/*
+	 * First, prevent address wrapping.
+	 */
+	movq	%rsi,%rax
+	addq	%rdx,%rax
+	jc	copyout_fault
+/*
+ * XXX STOP USING VM_MAXUSER_ADDRESS.
+ * It is an end address, not a max, so every time it is used correctly it
+ * looks like there is an off by one error, and of course it caused an off
+ * by one error in several places.
+ */
+	movq	$VM_MAXUSER_ADDRESS,%rcx
+	cmpq	%rcx,%rax
+	ja	copyout_fault
+
+	xchgq	%rdi,%rsi
+	/* bcopy(%rsi, %rdi, %rdx) */
+	movq	%rdx,%rcx
+
+	shrq	$3,%rcx
+	cld
+	rep
+	movsq
+	movb	%dl,%cl
+	andb	$7,%cl
+	rep
+	movsb
+
+done_copyout:
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rdx
+	movq	%rax,PCB_ONFAULT(%rdx)
+	ret
+
+	ALIGN_TEXT
+copyout_fault:
+	movq	PCPU(CURPCB),%rdx
+	movq	$0,PCB_ONFAULT(%rdx)
+	movq	$EFAULT,%rax
+	ret
+END(copyout)
+
+/*
+ * copyin(from_user, to_kernel, len) - MP SAFE
+ *        %rdi,      %rsi,      %rdx
+ */
+ENTRY(copyin)
+	movq	PCPU(CURPCB),%rax
+	movq	$copyin_fault,PCB_ONFAULT(%rax)
+	testq	%rdx,%rdx			/* anything to do? */
+	jz	done_copyin
+
+	/*
+	 * make sure address is valid
+	 */
+	movq	%rdi,%rax
+	addq	%rdx,%rax
+	jc	copyin_fault
+	movq	$VM_MAXUSER_ADDRESS,%rcx
+	cmpq	%rcx,%rax
+	ja	copyin_fault
+
+	xchgq	%rdi,%rsi
+	movq	%rdx,%rcx
+	movb	%cl,%al
+	shrq	$3,%rcx				/* copy longword-wise */
+	cld
+	rep
+	movsq
+	movb	%al,%cl
+	andb	$7,%cl				/* copy remaining bytes */
+	rep
+	movsb
+
+done_copyin:
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rdx
+	movq	%rax,PCB_ONFAULT(%rdx)
+	ret
+
+	ALIGN_TEXT
+copyin_fault:
+	movq	PCPU(CURPCB),%rdx
+	movq	$0,PCB_ONFAULT(%rdx)
+	movq	$EFAULT,%rax
+	ret
+END(copyin)
+
+/*
+ * casuword32.  Compare and set user integer.  Returns -1 or the current value.
+ *        dst = %rdi, old = %rsi, new = %rdx
+ */
+ENTRY(casuword32)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movl	%esi,%eax			/* old */
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %edx,(%rdi)			/* new = %edx */
+
+	/*
+	 * The old value is in %eax.  If the store succeeded it will be the
+	 * value we expected (old) from before the store, otherwise it will
+	 * be the current value.
+	 */
+
+	movq	PCPU(CURPCB),%rcx
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(casuword32)
+
+/*
+ * casuword.  Compare and set user word.  Returns -1 or the current value.
+ *        dst = %rdi, old = %rsi, new = %rdx
+ */
+ENTRY(casuword)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movq	%rsi,%rax			/* old */
+#ifdef SMP
+	lock
+#endif
+	cmpxchgq %rdx,(%rdi)			/* new = %rdx */
+
+	/*
+	 * The old value is in %eax.  If the store succeeded it will be the
+	 * value we expected (old) from before the store, otherwise it will
+	 * be the current value.
+	 */
+
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(casuword)
+
+/*
+ * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
+ * byte from user memory.  All these functions are MPSAFE.
+ * addr = %rdi
+ */
+
+ALTENTRY(fuword64)
+ENTRY(fuword)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-8,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movq	(%rdi),%rax
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(fuword64)	
+END(fuword)
+
+ENTRY(fuword32)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address is valid */
+	ja	fusufault
+
+	movl	(%rdi),%eax
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(fuword32)
+
+/*
+ * fuswintr() and suswintr() are specialized variants of fuword16() and
+ * suword16(), respectively.  They are called from the profiling code,
+ * potentially at interrupt time.  If they fail, that's okay; good things
+ * will happen later.  They always fail for now, until the trap code is
+ * able to deal with this.
+ */
+ALTENTRY(suswintr)
+ENTRY(fuswintr)
+	movq	$-1,%rax
+	ret
+END(suswintr)
+END(fuswintr)
+
+ENTRY(fuword16)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-2,%rax
+	cmpq	%rax,%rdi
+	ja	fusufault
+
+	movzwl	(%rdi),%eax
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(fuword16)
+
+ENTRY(fubyte)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-1,%rax
+	cmpq	%rax,%rdi
+	ja	fusufault
+
+	movzbl	(%rdi),%eax
+	movq	$0,PCB_ONFAULT(%rcx)
+	ret
+END(fubyte)
+
+	ALIGN_TEXT
+fusufault:
+	movq	PCPU(CURPCB),%rcx
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%rcx)
+	decq	%rax
+	ret
+
+/*
+ * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
+ * user memory.  All these functions are MPSAFE.
+ * addr = %rdi, value = %rsi
+ */
+ALTENTRY(suword64)
+ENTRY(suword)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-8,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	movq	%rsi,(%rdi)
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx
+	movq	%rax,PCB_ONFAULT(%rcx)
+	ret
+END(suword64)
+END(suword)
+
+ENTRY(suword32)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	movl	%esi,(%rdi)
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx
+	movq	%rax,PCB_ONFAULT(%rcx)
+	ret
+END(suword32)
+
+ENTRY(suword16)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-2,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	movw	%si,(%rdi)
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
+	movq	%rax,PCB_ONFAULT(%rcx)
+	ret
+END(suword16)
+
+ENTRY(subyte)
+	movq	PCPU(CURPCB),%rcx
+	movq	$fusufault,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS-1,%rax
+	cmpq	%rax,%rdi			/* verify address validity */
+	ja	fusufault
+
+	movl	%esi,%eax
+	movb	%al,(%rdi)
+	xorl	%eax,%eax
+	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
+	movq	%rax,PCB_ONFAULT(%rcx)
+	ret
+END(subyte)
+
+/*
+ * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
+ *           %rdi, %rsi, %rdx, %rcx
+ *
+ *	copy a string from from to to, stop when a 0 character is reached.
+ *	return ENAMETOOLONG if string is longer than maxlen, and
+ *	EFAULT on protection violations. If lencopied is non-zero,
+ *	return the actual length in *lencopied.
+ */
+ENTRY(copyinstr)
+	movq	%rdx,%r8			/* %r8 = maxlen */
+	movq	%rcx,%r9			/* %r9 = *len */
+	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
+	movq	PCPU(CURPCB),%rcx
+	movq	$cpystrflt,PCB_ONFAULT(%rcx)
+
+	movq	$VM_MAXUSER_ADDRESS,%rax
+
+	/* make sure 'from' is within bounds */
+	subq	%rsi,%rax
+	jbe	cpystrflt
+
+	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
+	cmpq	%rdx,%rax
+	jae	1f
+	movq	%rax,%rdx
+	movq	%rax,%r8
+1:
+	incq	%rdx
+	cld
+
+2:
+	decq	%rdx
+	jz	3f
+
+	lodsb
+	stosb
+	orb	%al,%al
+	jnz	2b
+
+	/* Success -- 0 byte reached */
+	decq	%rdx
+	xorl	%eax,%eax
+	jmp	cpystrflt_x
+3:
+	/* rdx is zero - return ENAMETOOLONG or EFAULT */
+	movq	$VM_MAXUSER_ADDRESS,%rax
+	cmpq	%rax,%rsi
+	jae	cpystrflt
+4:
+	movq	$ENAMETOOLONG,%rax
+	jmp	cpystrflt_x
+
+cpystrflt:
+	movq	$EFAULT,%rax
+
+cpystrflt_x:
+	/* set *lencopied and return %eax */
+	movq	PCPU(CURPCB),%rcx
+	movq	$0,PCB_ONFAULT(%rcx)
+
+	testq	%r9,%r9
+	jz	1f
+	subq	%rdx,%r8
+	movq	%r8,(%r9)
+1:
+	ret
+END(copyinstr)
+
+/*
+ * copystr(from, to, maxlen, int *lencopied) - MP SAFE
+ *         %rdi, %rsi, %rdx, %rcx
+ */
+ENTRY(copystr)
+	movq	%rdx,%r8			/* %r8 = maxlen */
+
+	xchgq	%rdi,%rsi
+	incq	%rdx
+	cld
+1:
+	decq	%rdx
+	jz	4f
+	lodsb
+	stosb
+	orb	%al,%al
+	jnz	1b
+
+	/* Success -- 0 byte reached */
+	decq	%rdx
+	xorl	%eax,%eax
+	jmp	6f
+4:
+	/* rdx is zero -- return ENAMETOOLONG */
+	movq	$ENAMETOOLONG,%rax
+
+6:
+
+	testq	%rcx,%rcx
+	jz	7f
+	/* set *lencopied and return %rax */
+	subq	%rdx,%r8
+	movq	%r8,(%rcx)
+7:
+	ret
+END(copystr)
+
+/*
+ * Handling of special amd64 registers and descriptor tables etc
+ * %rdi
+ */
+/* void lgdt(struct region_descriptor *rdp); */
+ENTRY(lgdt)
+	/* reload the descriptor table */
+	lgdt	(%rdi)
+
+	/* flush the prefetch q */
+	jmp	1f
+	nop
+1:
+	movl	$KDSEL,%eax
+	movl	%eax,%ds
+	movl	%eax,%es
+	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
+	movl	%eax,%gs
+	movl	%eax,%ss
+
+	/* reload code selector by turning return into intersegmental return */
+	popq	%rax
+	pushq	$KCSEL
+	pushq	%rax
+	MEXITCOUNT
+	lretq
+END(lgdt)
+
+/*****************************************************************************/
+/* setjump, longjump                                                         */
+/*****************************************************************************/
+
+ENTRY(setjmp)
+	movq	%rbx,0(%rdi)			/* save rbx */
+	movq	%rsp,8(%rdi)			/* save rsp */
+	movq	%rbp,16(%rdi)			/* save rbp */
+	movq	%r12,24(%rdi)			/* save r12 */
+	movq	%r13,32(%rdi)			/* save r13 */
+	movq	%r14,40(%rdi)			/* save r14 */
+	movq	%r15,48(%rdi)			/* save r15 */
+	movq	0(%rsp),%rdx			/* get rta */
+	movq	%rdx,56(%rdi)			/* save rip */
+	xorl	%eax,%eax			/* return(0); */
+	ret
+END(setjmp)
+
+ENTRY(longjmp)
+	movq	0(%rdi),%rbx			/* restore rbx */
+	movq	8(%rdi),%rsp			/* restore rsp */
+	movq	16(%rdi),%rbp			/* restore rbp */
+	movq	24(%rdi),%r12			/* restore r12 */
+	movq	32(%rdi),%r13			/* restore r13 */
+	movq	40(%rdi),%r14			/* restore r14 */
+	movq	48(%rdi),%r15			/* restore r15 */
+	movq	56(%rdi),%rdx			/* get rta */
+	movq	%rdx,0(%rsp)			/* put in return frame */
+	xorl	%eax,%eax			/* return(1); */
+	incl	%eax
+	ret
+END(longjmp)
+
+/*
+ * Support for reading MSRs in the safe manner.
+ */
+ENTRY(rdmsr_safe)
+/* int rdmsr_safe(u_int msr, uint64_t *data) */
+	movq	PCPU(CURPCB),%r8
+	movq	$msr_onfault,PCB_ONFAULT(%r8)
+	movl	%edi,%ecx
+	rdmsr			/* Read MSR pointed by %ecx. Returns
+				   hi byte in edx, lo in %eax */
+	salq	$32,%rdx	/* sign-shift %rdx left */
+	movl	%eax,%eax	/* zero-extend %eax -> %rax */
+	orq	%rdx,%rax
+	movq	%rax,(%rsi)
+	xorq	%rax,%rax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+/*
+ * Support for writing MSRs in the safe manner.
+ */
+ENTRY(wrmsr_safe)
+/* int wrmsr_safe(u_int msr, uint64_t data) */
+	movq	PCPU(CURPCB),%r8
+	movq	$msr_onfault,PCB_ONFAULT(%r8)
+	movl	%edi,%ecx
+	movl	%esi,%eax
+	sarq	$32,%rsi
+	movl	%esi,%edx
+	wrmsr			/* Write MSR pointed by %ecx. Accepts
+				   hi byte in edx, lo in %eax. */
+	xorq	%rax,%rax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+/*
+ * MSR operations fault handler
+ */
+	ALIGN_TEXT
+msr_onfault:
+	movq	$0,PCB_ONFAULT(%r8)
+	movl	$EFAULT,%eax
+	ret
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
new file mode 100644
index 0000000..2f136ab
--- /dev/null
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -0,0 +1,753 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_capsicum.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/capability.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/sysproto.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>		/* for kernel_map */
+#include <vm/vm_extern.h>
+
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <machine/tss.h>
+#include <machine/vmparam.h>
+
+#include <security/audit/audit.h>
+
+#define	MAX_LD		8192
+
+int max_ldt_segment = 1024;
+SYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN,
+    &max_ldt_segment, 0,
+    "Maximum number of allowed LDT segments in the single address space");
+
+static void
+max_ldt_segment_init(void *arg __unused)
+{
+
+	TUNABLE_INT_FETCH("machdep.max_ldt_segment", &max_ldt_segment);
+	if (max_ldt_segment <= 0)
+		max_ldt_segment = 1;
+	if (max_ldt_segment > MAX_LD)
+		max_ldt_segment = MAX_LD;
+}
+SYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL);
+
+#ifdef notyet
+#ifdef SMP
+static void set_user_ldt_rv(struct vmspace *vmsp);
+#endif
+#endif
+static void user_ldt_derefl(struct proc_ldt *pldt);
+
+#ifndef _SYS_SYSPROTO_H_
+struct sysarch_args {
+	int op;
+	char *parms;
+};
+#endif
+
+int
+sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
+{
+	struct i386_ldt_args *largs, la;
+	struct user_segment_descriptor *lp;
+	int error = 0;
+
+	/*
+	 * XXXKIB check that the BSM generation code knows to encode
+	 * the op argument.
+	 */
+	AUDIT_ARG_CMD(uap->op);
+	if (uap_space == UIO_USERSPACE) {
+		error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args));
+		if (error != 0)
+			return (error);
+		largs = &la;
+	} else
+		largs = (struct i386_ldt_args *)uap->parms;
+
+	switch (uap->op) {
+	case I386_GET_LDT:
+		error = amd64_get_ldt(td, largs);
+		break;
+	case I386_SET_LDT:
+		if (largs->descs != NULL && largs->num > max_ldt_segment)
+			return (EINVAL);
+		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+		if (largs->descs != NULL) {
+			lp = malloc(largs->num * sizeof(struct
+			    user_segment_descriptor), M_TEMP, M_WAITOK);
+			error = copyin(largs->descs, lp, largs->num *
+			    sizeof(struct user_segment_descriptor));
+			if (error == 0)
+				error = amd64_set_ldt(td, largs, lp);
+			free(lp, M_TEMP);
+		} else {
+			error = amd64_set_ldt(td, largs, NULL);
+		}
+		break;
+	}
+	return (error);
+}
+
+void
+update_gdt_gsbase(struct thread *td, uint32_t base)
+{
+	struct user_segment_descriptor *sd;
+
+	if (td != curthread)
+		return;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	critical_enter();
+	sd = PCPU_GET(gs32p);
+	sd->sd_lobase = base & 0xffffff;
+	sd->sd_hibase = (base >> 24) & 0xff;
+	critical_exit();
+}
+
+void
+update_gdt_fsbase(struct thread *td, uint32_t base)
+{
+	struct user_segment_descriptor *sd;
+
+	if (td != curthread)
+		return;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	critical_enter();
+	sd = PCPU_GET(fs32p);
+	sd->sd_lobase = base & 0xffffff;
+	sd->sd_hibase = (base >> 24) & 0xff;
+	critical_exit();
+}
+
+int
+sysarch(td, uap)
+	struct thread *td;
+	register struct sysarch_args *uap;
+{
+	int error = 0;
+	struct pcb *pcb = curthread->td_pcb;
+	uint32_t i386base;
+	uint64_t a64base;
+	struct i386_ioperm_args iargs;
+	struct i386_get_xfpustate i386xfpu;
+	struct amd64_get_xfpustate a64xfpu;
+
+#ifdef CAPABILITY_MODE
+	/*
+	 * When adding new operations, add a new case statement here to
+	 * explicitly indicate whether or not the operation is safe to
+	 * perform in capability mode.
+	 */
+	if (IN_CAPABILITY_MODE(td)) {
+		switch (uap->op) {
+		case I386_GET_LDT:
+		case I386_SET_LDT:
+		case I386_GET_IOPERM:
+		case I386_GET_FSBASE:
+		case I386_SET_FSBASE:
+		case I386_GET_GSBASE:
+		case I386_SET_GSBASE:
+		case I386_GET_XFPUSTATE:
+		case AMD64_GET_FSBASE:
+		case AMD64_SET_FSBASE:
+		case AMD64_GET_GSBASE:
+		case AMD64_SET_GSBASE:
+		case AMD64_GET_XFPUSTATE:
+			break;
+
+		case I386_SET_IOPERM:
+		default:
+#ifdef KTRACE
+			if (KTRPOINT(td, KTR_CAPFAIL))
+				ktrcapfail(CAPFAIL_SYSCALL, 0, 0);
+#endif
+			return (ECAPMODE);
+		}
+	}
+#endif
+
+	if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
+		return (sysarch_ldt(td, uap, UIO_USERSPACE));
+	/*
+	 * XXXKIB check that the BSM generation code knows to encode
+	 * the op argument.
+	 */
+	AUDIT_ARG_CMD(uap->op);
+	switch (uap->op) {
+	case I386_GET_IOPERM:
+	case I386_SET_IOPERM:
+		if ((error = copyin(uap->parms, &iargs,
+		    sizeof(struct i386_ioperm_args))) != 0)
+			return (error);
+		break;
+	case I386_GET_XFPUSTATE:
+		if ((error = copyin(uap->parms, &i386xfpu,
+		    sizeof(struct i386_get_xfpustate))) != 0)
+			return (error);
+		a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
+		a64xfpu.len = i386xfpu.len;
+		break;
+	case AMD64_GET_XFPUSTATE:
+		if ((error = copyin(uap->parms, &a64xfpu,
+		    sizeof(struct amd64_get_xfpustate))) != 0)
+			return (error);
+		break;
+	default:
+		break;
+	}
+
+	switch (uap->op) {
+	case I386_GET_IOPERM:
+		error = amd64_get_ioperm(td, &iargs);
+		if (error == 0)
+			error = copyout(&iargs, uap->parms,
+			    sizeof(struct i386_ioperm_args));
+		break;
+	case I386_SET_IOPERM:
+		error = amd64_set_ioperm(td, &iargs);
+		break;
+	case I386_GET_FSBASE:
+		i386base = pcb->pcb_fsbase;
+		error = copyout(&i386base, uap->parms, sizeof(i386base));
+		break;
+	case I386_SET_FSBASE:
+		error = copyin(uap->parms, &i386base, sizeof(i386base));
+		if (!error) {
+			pcb->pcb_fsbase = i386base;
+			td->td_frame->tf_fs = _ufssel;
+			update_gdt_fsbase(td, i386base);
+		}
+		break;
+	case I386_GET_GSBASE:
+		i386base = pcb->pcb_gsbase;
+		error = copyout(&i386base, uap->parms, sizeof(i386base));
+		break;
+	case I386_SET_GSBASE:
+		error = copyin(uap->parms, &i386base, sizeof(i386base));
+		if (!error) {
+			pcb->pcb_gsbase = i386base;
+			td->td_frame->tf_gs = _ugssel;
+			update_gdt_gsbase(td, i386base);
+		}
+		break;
+	case AMD64_GET_FSBASE:
+		error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
+		break;
+		
+	case AMD64_SET_FSBASE:
+		error = copyin(uap->parms, &a64base, sizeof(a64base));
+		if (!error) {
+			if (a64base < VM_MAXUSER_ADDRESS) {
+				pcb->pcb_fsbase = a64base;
+				set_pcb_flags(pcb, PCB_FULL_IRET);
+				td->td_frame->tf_fs = _ufssel;
+			} else
+				error = EINVAL;
+		}
+		break;
+
+	case AMD64_GET_GSBASE:
+		error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
+		break;
+
+	case AMD64_SET_GSBASE:
+		error = copyin(uap->parms, &a64base, sizeof(a64base));
+		if (!error) {
+			if (a64base < VM_MAXUSER_ADDRESS) {
+				pcb->pcb_gsbase = a64base;
+				set_pcb_flags(pcb, PCB_FULL_IRET);
+				td->td_frame->tf_gs = _ugssel;
+			} else
+				error = EINVAL;
+		}
+		break;
+
+	case I386_GET_XFPUSTATE:
+	case AMD64_GET_XFPUSTATE:
+		if (a64xfpu.len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu))
+			return (EINVAL);
+		fpugetregs(td);
+		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
+		    a64xfpu.addr, a64xfpu.len);
+		return (error);
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+int
+amd64_set_ioperm(td, uap)
+	struct thread *td;
+	struct i386_ioperm_args *uap;
+{
+	int i, error;
+	char *iomap;
+	struct amd64tss *tssp;
+	struct system_segment_descriptor *tss_sd;
+	u_long *addr;
+	struct pcb *pcb;
+
+	if ((error = priv_check(td, PRIV_IO)) != 0)
+		return (error);
+	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
+		return (error);
+	if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
+		return (EINVAL);
+
+	/*
+	 * XXX
+	 * While this is restricted to root, we should probably figure out
+	 * whether any other driver is using this i/o address, as so not to
+	 * cause confusion.  This probably requires a global 'usage registry'.
+	 */
+	pcb = td->td_pcb;
+	if (pcb->pcb_tssp == NULL) {
+		tssp = (struct amd64tss *)kmem_alloc(kernel_map,
+		    ctob(IOPAGES+1));
+		if (tssp == NULL)
+			return (ENOMEM);
+		iomap = (char *)&tssp[1];
+		addr = (u_long *)iomap;
+		for (i = 0; i < (ctob(IOPAGES) + 1) / sizeof(u_long); i++)
+			*addr++ = ~0;
+		critical_enter();
+		/* Takes care of tss_rsp0. */
+		memcpy(tssp, &common_tss[PCPU_GET(cpuid)],
+		    sizeof(struct amd64tss));
+		tssp->tss_iobase = sizeof(*tssp);
+		pcb->pcb_tssp = tssp;
+		tss_sd = PCPU_GET(tss);
+		tss_sd->sd_lobase = (u_long)tssp & 0xffffff;
+		tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful;
+		tss_sd->sd_type = SDT_SYSTSS;
+		ltr(GSEL(GPROC0_SEL, SEL_KPL));
+		PCPU_SET(tssp, tssp);
+		critical_exit();
+	} else
+		iomap = (char *)&pcb->pcb_tssp[1];
+	for (i = uap->start; i < uap->start + uap->length; i++) {
+		if (uap->enable)
+			iomap[i >> 3] &= ~(1 << (i & 7));
+		else
+			iomap[i >> 3] |= (1 << (i & 7));
+	}
+	return (error);
+}
+
+int
+amd64_get_ioperm(td, uap)
+	struct thread *td;
+	struct i386_ioperm_args *uap;
+{
+	int i, state;
+	char *iomap;
+
+	if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
+		return (EINVAL);
+	if (td->td_pcb->pcb_tssp == NULL) {
+		uap->length = 0;
+		goto done;
+	}
+
+	iomap = (char *)&td->td_pcb->pcb_tssp[1];
+
+	i = uap->start;
+	state = (iomap[i >> 3] >> (i & 7)) & 1;
+	uap->enable = !state;
+	uap->length = 1;
+
+	for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
+		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
+			break;
+		uap->length++;
+	}
+
+done:
+	return (0);
+}
+
+/*
+ * Update the GDT entry pointing to the LDT to point to the LDT of the
+ * current process.
+ */
+void
+set_user_ldt(struct mdproc *mdp)
+{
+
+	critical_enter();
+	*PCPU_GET(ldt) = mdp->md_ldt_sd;
+	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
+	critical_exit();
+}
+
+#ifdef notyet
+#ifdef SMP
+static void
+set_user_ldt_rv(struct vmspace *vmsp)
+{
+	struct thread *td;
+
+	td = curthread;
+	if (vmsp != td->td_proc->p_vmspace)
+		return;
+
+	set_user_ldt(&td->td_proc->p_md);
+}
+#endif
+#endif
+
+struct proc_ldt *
+user_ldt_alloc(struct proc *p, int force)
+{
+	struct proc_ldt *pldt, *new_ldt;
+	struct mdproc *mdp;
+	struct soft_segment_descriptor sldt;
+
+	mtx_assert(&dt_lock, MA_OWNED);
+	mdp = &p->p_md;
+	if (!force && mdp->md_ldt != NULL)
+		return (mdp->md_ldt);
+	mtx_unlock(&dt_lock);
+	new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
+	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
+	     max_ldt_segment * sizeof(struct user_segment_descriptor));
+	if (new_ldt->ldt_base == NULL) {
+		FREE(new_ldt, M_SUBPROC);
+		mtx_lock(&dt_lock);
+		return (NULL);
+	}
+	new_ldt->ldt_refcnt = 1;
+	sldt.ssd_base = (uint64_t)new_ldt->ldt_base;
+	sldt.ssd_limit = max_ldt_segment *
+	    sizeof(struct user_segment_descriptor) - 1;
+	sldt.ssd_type = SDT_SYSLDT;
+	sldt.ssd_dpl = SEL_KPL;
+	sldt.ssd_p = 1;
+	sldt.ssd_long = 0;
+	sldt.ssd_def32 = 0;
+	sldt.ssd_gran = 0;
+	mtx_lock(&dt_lock);
+	pldt = mdp->md_ldt;
+	if (pldt != NULL && !force) {
+		kmem_free(kernel_map, (vm_offset_t)new_ldt->ldt_base,
+		    max_ldt_segment * sizeof(struct user_segment_descriptor));
+		free(new_ldt, M_SUBPROC);
+		return (pldt);
+	}
+
+	if (pldt != NULL) {
+		bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment *
+		    sizeof(struct user_segment_descriptor));
+		user_ldt_derefl(pldt);
+	}
+	ssdtosyssd(&sldt, &p->p_md.md_ldt_sd);
+	atomic_store_rel_ptr((volatile uintptr_t *)&mdp->md_ldt,
+	    (uintptr_t)new_ldt);
+	if (p == curproc)
+		set_user_ldt(mdp);
+
+	return (mdp->md_ldt);
+}
+
+void
+user_ldt_free(struct thread *td)
+{
+	struct proc *p = td->td_proc;
+	struct mdproc *mdp = &p->p_md;
+	struct proc_ldt *pldt;
+
+	mtx_assert(&dt_lock, MA_OWNED);
+	if ((pldt = mdp->md_ldt) == NULL) {
+		mtx_unlock(&dt_lock);
+		return;
+	}
+
+	mdp->md_ldt = NULL;
+	bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd));
+	if (td == curthread)
+		lldt(GSEL(GNULL_SEL, SEL_KPL));
+	user_ldt_deref(pldt);
+}
+
+static void
+user_ldt_derefl(struct proc_ldt *pldt)
+{
+
+	if (--pldt->ldt_refcnt == 0) {
+		kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+		    max_ldt_segment * sizeof(struct user_segment_descriptor));
+		free(pldt, M_SUBPROC);
+	}
+}
+
+void
+user_ldt_deref(struct proc_ldt *pldt)
+{
+
+	mtx_assert(&dt_lock, MA_OWNED);
+	user_ldt_derefl(pldt);
+	mtx_unlock(&dt_lock);
+}
+
+/*
+ * Note for the authors of compat layers (linux, etc): copyout() in
+ * the function below is not a problem since it presents data in
+ * arch-specific format (i.e. i386-specific in this case), not in
+ * the OS-specific one.
+ */
+int
+amd64_get_ldt(td, uap)
+	struct thread *td;
+	struct i386_ldt_args *uap;
+{
+	int error = 0;
+	struct proc_ldt *pldt;
+	int num;
+	struct user_segment_descriptor *lp;
+
+#ifdef	DEBUG
+	printf("amd64_get_ldt: start=%d num=%d descs=%p\n",
+	    uap->start, uap->num, (void *)uap->descs);
+#endif
+
+	if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
+		lp = &((struct user_segment_descriptor *)(pldt->ldt_base))
+		    [uap->start];
+		num = min(uap->num, max_ldt_segment);
+	} else
+		return (EINVAL);
+
+	if ((uap->start > (unsigned int)max_ldt_segment) ||
+	    ((unsigned int)num > (unsigned int)max_ldt_segment) ||
+	    ((unsigned int)(uap->start + num) > (unsigned int)max_ldt_segment))
+		return(EINVAL);
+
+	error = copyout(lp, uap->descs, num *
+	    sizeof(struct user_segment_descriptor));
+	if (!error)
+		td->td_retval[0] = num;
+
+	return(error);
+}
+
+int
+amd64_set_ldt(td, uap, descs)
+	struct thread *td;
+	struct i386_ldt_args *uap;
+	struct user_segment_descriptor *descs;
+{
+	int error = 0, i;
+	int largest_ld;
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt;
+	struct user_segment_descriptor *dp;
+	struct proc *p;
+
+#ifdef	DEBUG
+	printf("amd64_set_ldt: start=%d num=%d descs=%p\n",
+	    uap->start, uap->num, (void *)uap->descs);
+#endif
+
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	p = td->td_proc;
+	if (descs == NULL) {
+		/* Free descriptors */
+		if (uap->start == 0 && uap->num == 0)
+			uap->num = max_ldt_segment;
+		if (uap->num == 0)
+			return (EINVAL);
+		if ((pldt = mdp->md_ldt) == NULL ||
+		    uap->start >= max_ldt_segment)
+			return (0);
+		largest_ld = uap->start + uap->num;
+		if (largest_ld > max_ldt_segment)
+			largest_ld = max_ldt_segment;
+		i = largest_ld - uap->start;
+		mtx_lock(&dt_lock);
+		bzero(&((struct user_segment_descriptor *)(pldt->ldt_base))
+		    [uap->start], sizeof(struct user_segment_descriptor) * i);
+		mtx_unlock(&dt_lock);
+		return (0);
+	}
+
+	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
+		/* verify range of descriptors to modify */
+		largest_ld = uap->start + uap->num;
+		if (uap->start >= max_ldt_segment ||
+		    largest_ld > max_ldt_segment)
+			return (EINVAL);
+	}
+
+	/* Check descriptors for access violations */
+	for (i = 0; i < uap->num; i++) {
+		dp = &descs[i];
+
+		switch (dp->sd_type) {
+		case SDT_SYSNULL:	/* system null */
+			dp->sd_p = 0;
+			break;
+		case SDT_SYS286TSS:
+		case SDT_SYSLDT:
+		case SDT_SYS286BSY:
+		case SDT_SYS286CGT:
+		case SDT_SYSTASKGT:
+		case SDT_SYS286IGT:
+		case SDT_SYS286TGT:
+		case SDT_SYSNULL2:
+		case SDT_SYSTSS:
+		case SDT_SYSNULL3:
+		case SDT_SYSBSY:
+		case SDT_SYSCGT:
+		case SDT_SYSNULL4:
+		case SDT_SYSIGT:
+		case SDT_SYSTGT:
+			/* I can't think of any reason to allow a user proc
+			 * to create a segment of these types.  They are
+			 * for OS use only.
+			 */
+			return (EACCES);
+			/*NOTREACHED*/
+
+		/* memory segment types */
+		case SDT_MEMEC:   /* memory execute only conforming */
+		case SDT_MEMEAC:  /* memory execute only accessed conforming */
+		case SDT_MEMERC:  /* memory execute read conforming */
+		case SDT_MEMERAC: /* memory execute read accessed conforming */
+			 /* Must be "present" if executable and conforming. */
+			if (dp->sd_p == 0)
+				return (EACCES);
+			break;
+		case SDT_MEMRO:   /* memory read only */
+		case SDT_MEMROA:  /* memory read only accessed */
+		case SDT_MEMRW:   /* memory read write */
+		case SDT_MEMRWA:  /* memory read write accessed */
+		case SDT_MEMROD:  /* memory read only expand dwn limit */
+		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
+		case SDT_MEMRWD:  /* memory read write expand dwn limit */
+		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
+		case SDT_MEME:    /* memory execute only */
+		case SDT_MEMEA:   /* memory execute only accessed */
+		case SDT_MEMER:   /* memory execute read */
+		case SDT_MEMERA:  /* memory execute read accessed */
+			break;
+		default:
+			return(EINVAL);
+			/*NOTREACHED*/
+		}
+
+		/* Only user (ring-3) descriptors may be present. */
+		if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL))
+			return (EACCES);
+	}
+
+	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
+		/* Allocate a free slot */
+		mtx_lock(&dt_lock);
+		pldt = user_ldt_alloc(p, 0);
+		if (pldt == NULL) {
+			mtx_unlock(&dt_lock);
+			return (ENOMEM);
+		}
+
+		/*
+		 * start scanning a bit up to leave room for NVidia and
+		 * Wine, which still user the "Blat" method of allocation.
+		 */
+		i = 16;
+		dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i];
+		for (; i < max_ldt_segment; ++i, ++dp) {
+			if (dp->sd_type == SDT_SYSNULL)
+				break;
+		}
+		if (i >= max_ldt_segment) {
+			mtx_unlock(&dt_lock);
+			return (ENOSPC);
+		}
+		uap->start = i;
+		error = amd64_set_ldt_data(td, i, 1, descs);
+		mtx_unlock(&dt_lock);
+	} else {
+		largest_ld = uap->start + uap->num;
+		if (largest_ld > max_ldt_segment)
+			return (EINVAL);
+		mtx_lock(&dt_lock);
+		if (user_ldt_alloc(p, 0) != NULL) {
+			error = amd64_set_ldt_data(td, uap->start, uap->num,
+			    descs);
+		}
+		mtx_unlock(&dt_lock);
+	}
+	if (error == 0)
+		td->td_retval[0] = uap->start;
+	return (error);
+}
+
+int
+amd64_set_ldt_data(struct thread *td, int start, int num,
+    struct user_segment_descriptor *descs)
+{
+	struct mdproc *mdp = &td->td_proc->p_md;
+	struct proc_ldt *pldt = mdp->md_ldt;
+
+	mtx_assert(&dt_lock, MA_OWNED);
+
+	/* Fill in range */
+	bcopy(descs,
+	    &((struct user_segment_descriptor *)(pldt->ldt_base))[start],
+	    num * sizeof(struct user_segment_descriptor));
+	return (0);
+}
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
new file mode 100644
index 0000000..6fcca81
--- /dev/null
+++ b/sys/amd64/amd64/trap.c
@@ -0,0 +1,1006 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * AMD64 Trap and System call handling
+ */
+
+#include "opt_clock.h"
+#include "opt_cpu.h"
+#include "opt_hwpmc_hooks.h"
+#include "opt_isa.h"
+#include "opt_kdb.h"
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/ptrace.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#ifdef HWPMC_HOOKS
+#include <sys/pmckern.h>
+PMC_SOFT_DEFINE( , , page_fault, all);
+PMC_SOFT_DEFINE( , , page_fault, read);
+PMC_SOFT_DEFINE( , , page_fault, write);
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
+#include <x86/mca.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+#include <machine/tss.h>
+
+#ifdef KDTRACE_HOOKS
+#include <sys/dtrace_bsd.h>
+
+/*
+ * This is a hook which is initialised by the dtrace module
+ * to handle traps which might occur during DTrace probe
+ * execution.
+ */
+dtrace_trap_func_t	dtrace_trap_func;
+
+dtrace_doubletrap_func_t	dtrace_doubletrap_func;
+
+/*
+ * This is a hook which is initialised by the systrace module
+ * when it is loaded. This keeps the DTrace syscall provider
+ * implementation opaque. 
+ */
+systrace_probe_func_t	systrace_probe_func;
+
+/*
+ * These hooks are necessary for the pid, usdt and fasttrap providers.
+ */
+dtrace_fasttrap_probe_ptr_t	dtrace_fasttrap_probe_ptr;
+dtrace_pid_probe_ptr_t		dtrace_pid_probe_ptr;
+dtrace_return_probe_ptr_t	dtrace_return_probe_ptr;
+#endif
+
+extern void trap(struct trapframe *frame);
+extern void syscall(struct trapframe *frame);
+void dblfault_handler(struct trapframe *frame);
+
+static int trap_pfault(struct trapframe *, int);
+static void trap_fatal(struct trapframe *, vm_offset_t);
+
+#define MAX_TRAP_MSG		33
+static char *trap_msg[] = {
+	"",					/*  0 unused */
+	"privileged instruction fault",		/*  1 T_PRIVINFLT */
+	"",					/*  2 unused */
+	"breakpoint instruction fault",		/*  3 T_BPTFLT */
+	"",					/*  4 unused */
+	"",					/*  5 unused */
+	"arithmetic trap",			/*  6 T_ARITHTRAP */
+	"",					/*  7 unused */
+	"",					/*  8 unused */
+	"general protection fault",		/*  9 T_PROTFLT */
+	"trace trap",				/* 10 T_TRCTRAP */
+	"",					/* 11 unused */
+	"page fault",				/* 12 T_PAGEFLT */
+	"",					/* 13 unused */
+	"alignment fault",			/* 14 T_ALIGNFLT */
+	"",					/* 15 unused */
+	"",					/* 16 unused */
+	"",					/* 17 unused */
+	"integer divide fault",			/* 18 T_DIVIDE */
+	"non-maskable interrupt trap",		/* 19 T_NMI */
+	"overflow trap",			/* 20 T_OFLOW */
+	"FPU bounds check fault",		/* 21 T_BOUND */
+	"FPU device not available",		/* 22 T_DNA */
+	"double fault",				/* 23 T_DOUBLEFLT */
+	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
+	"invalid TSS fault",			/* 25 T_TSSFLT */
+	"segment not present fault",		/* 26 T_SEGNPFLT */
+	"stack fault",				/* 27 T_STKFLT */
+	"machine check trap",			/* 28 T_MCHK */
+	"SIMD floating-point exception",	/* 29 T_XMMFLT */
+	"reserved (unknown) fault",		/* 30 T_RESERVED */
+	"",					/* 31 unused (reserved) */
+	"DTrace pid return trap",		/* 32 T_DTRACE_RET */
+	"DTrace fasttrap probe trap",		/* 33 T_DTRACE_PROBE */
+};
+
+#ifdef KDB
+static int kdb_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
+	&kdb_on_nmi, 0, "Go to KDB on NMI");
+TUNABLE_INT("machdep.kdb_on_nmi", &kdb_on_nmi);
+#endif
+static int panic_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
+	&panic_on_nmi, 0, "Panic on NMI");
+TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
+static int prot_fault_translation;
+SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
+    &prot_fault_translation, 0,
+    "Select signal to deliver on protection fault");
+static int uprintf_signal;
+SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
+    &uprintf_signal, 0,
+    "Print debugging information on trap signal to ctty");
+
+/*
+ * Exception, fault, and trap interface to the FreeBSD kernel.
+ * This common code is called from assembly language IDT gate entry
+ * routines that prepare a suitable stack frame, and restore this
+ * frame after the exception has been processed.
+ */
+
+void
+trap(struct trapframe *frame)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	int i = 0, ucode = 0, code;
+	u_int type;
+	register_t addr = 0;
+	ksiginfo_t ksi;
+
+	PCPU_INC(cnt.v_trap);
+	type = frame->tf_trapno;
+
+#ifdef SMP
+	/* Handler for NMI IPIs used for stopping CPUs. */
+	if (type == T_NMI) {
+	         if (ipi_nmi_handler() == 0)
+	                   goto out;
+	}
+#endif /* SMP */
+
+#ifdef KDB
+	if (kdb_active) {
+		kdb_reenter();
+		goto out;
+	}
+#endif
+
+	if (type == T_RESERVED) {
+		trap_fatal(frame, 0);
+		goto out;
+	}
+
+#ifdef	HWPMC_HOOKS
+	/*
+	 * CPU PMCs interrupt using an NMI.  If the PMC module is
+	 * active, pass the 'rip' value to the PMC module's interrupt
+	 * handler.  A return value of '1' from the handler means that
+	 * the NMI was handled by it and we can return immediately.
+	 */
+	if (type == T_NMI && pmc_intr &&
+	    (*pmc_intr)(PCPU_GET(cpuid), frame))
+		goto out;
+#endif
+
+	if (type == T_MCHK) {
+		mca_intr();
+		goto out;
+	}
+
+#ifdef KDTRACE_HOOKS
+	/*
+	 * A trap can occur while DTrace executes a probe. Before
+	 * executing the probe, DTrace blocks re-scheduling and sets
+	 * a flag in it's per-cpu flags to indicate that it doesn't
+	 * want to fault. On returning from the probe, the no-fault
+	 * flag is cleared and finally re-scheduling is enabled.
+	 *
+	 * If the DTrace kernel module has registered a trap handler,
+	 * call it and if it returns non-zero, assume that it has
+	 * handled the trap and modified the trap frame so that this
+	 * function can return normally.
+	 */
+	if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
+	    type == T_BPTFLT) {
+		struct reg regs;
+
+		fill_frame_regs(frame, &regs);
+		if (type == T_DTRACE_PROBE &&
+		    dtrace_fasttrap_probe_ptr != NULL &&
+		    dtrace_fasttrap_probe_ptr(&regs) == 0)
+			goto out;
+		else if (type == T_BPTFLT &&
+		    dtrace_pid_probe_ptr != NULL &&
+		    dtrace_pid_probe_ptr(&regs) == 0)
+			goto out;
+		else if (type == T_DTRACE_RET &&
+		    dtrace_return_probe_ptr != NULL &&
+		    dtrace_return_probe_ptr(&regs) == 0)
+			goto out;
+	}
+	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
+		goto out;
+#endif
+
+	if ((frame->tf_rflags & PSL_I) == 0) {
+		/*
+		 * Buggy application or kernel code has disabled
+		 * interrupts and then trapped.  Enabling interrupts
+		 * now is wrong, but it is better than running with
+		 * interrupts disabled until they are accidentally
+		 * enabled later.
+		 */
+		if (ISPL(frame->tf_cs) == SEL_UPL)
+			uprintf(
+			    "pid %ld (%s): trap %d with interrupts disabled\n",
+			    (long)curproc->p_pid, curthread->td_name, type);
+		else if (type != T_NMI && type != T_BPTFLT &&
+		    type != T_TRCTRAP) {
+			/*
+			 * XXX not quite right, since this may be for a
+			 * multiple fault in user mode.
+			 */
+			printf("kernel trap %d with interrupts disabled\n",
+			    type);
+
+			/*
+			 * We shouldn't enable interrupts while holding a
+			 * spin lock.
+			 */
+			if (td->td_md.md_spinlock_count == 0)
+				enable_intr();
+		}
+	}
+
+	code = frame->tf_err;
+
+        if (ISPL(frame->tf_cs) == SEL_UPL) {
+		/* user trap */
+
+		td->td_pticks = 0;
+		td->td_frame = frame;
+		addr = frame->tf_rip;
+		if (td->td_ucred != p->p_ucred) 
+			cred_update_thread(td);
+
+		switch (type) {
+		case T_PRIVINFLT:	/* privileged instruction fault */
+			i = SIGILL;
+			ucode = ILL_PRVOPC;
+			break;
+
+		case T_BPTFLT:		/* bpt instruction fault */
+		case T_TRCTRAP:		/* trace trap */
+			enable_intr();
+			frame->tf_rflags &= ~PSL_T;
+			i = SIGTRAP;
+			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
+			break;
+
+		case T_ARITHTRAP:	/* arithmetic trap */
+			ucode = fputrap_x87();
+			if (ucode == -1)
+				goto userout;
+			i = SIGFPE;
+			break;
+
+		case T_PROTFLT:		/* general protection fault */
+			i = SIGBUS;
+			ucode = BUS_OBJERR;
+			break;
+		case T_STKFLT:		/* stack fault */
+		case T_SEGNPFLT:	/* segment not present fault */
+			i = SIGBUS;
+			ucode = BUS_ADRERR;
+			break;
+		case T_TSSFLT:		/* invalid TSS fault */
+			i = SIGBUS;
+			ucode = BUS_OBJERR;
+			break;
+		case T_DOUBLEFLT:	/* double fault */
+		default:
+			i = SIGBUS;
+			ucode = BUS_OBJERR;
+			break;
+
+		case T_PAGEFLT:		/* page fault */
+			addr = frame->tf_addr;
+			i = trap_pfault(frame, TRUE);
+			if (i == -1)
+				goto userout;
+			if (i == 0)
+				goto user;
+
+			if (i == SIGSEGV)
+				ucode = SEGV_MAPERR;
+			else {
+				if (prot_fault_translation == 0) {
+					/*
+					 * Autodetect.
+					 * This check also covers the images
+					 * without the ABI-tag ELF note.
+					 */
+					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
+					    && p->p_osrel >= P_OSREL_SIGSEGV) {
+						i = SIGSEGV;
+						ucode = SEGV_ACCERR;
+					} else {
+						i = SIGBUS;
+						ucode = BUS_PAGE_FAULT;
+					}
+				} else if (prot_fault_translation == 1) {
+					/*
+					 * Always compat mode.
+					 */
+					i = SIGBUS;
+					ucode = BUS_PAGE_FAULT;
+				} else {
+					/*
+					 * Always SIGSEGV mode.
+					 */
+					i = SIGSEGV;
+					ucode = SEGV_ACCERR;
+				}
+			}
+			break;
+
+		case T_DIVIDE:		/* integer divide fault */
+			ucode = FPE_INTDIV;
+			i = SIGFPE;
+			break;
+
+#ifdef DEV_ISA
+		case T_NMI:
+			/* machine/parity/power fail/"kitchen sink" faults */
+			if (isa_nmi(code) == 0) {
+#ifdef KDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (kdb_on_nmi) {
+					printf ("NMI ... going to debugger\n");
+					kdb_trap(type, 0, frame);
+				}
+#endif /* KDB */
+				goto userout;
+			} else if (panic_on_nmi)
+				panic("NMI indicates hardware failure");
+			break;
+#endif /* DEV_ISA */
+
+		case T_OFLOW:		/* integer overflow fault */
+			ucode = FPE_INTOVF;
+			i = SIGFPE;
+			break;
+
+		case T_BOUND:		/* bounds check fault */
+			ucode = FPE_FLTSUB;
+			i = SIGFPE;
+			break;
+
+		case T_DNA:
+			/* transparent fault (due to context switch "late") */
+			KASSERT(PCB_USER_FPU(td->td_pcb),
+			    ("kernel FPU ctx has leaked"));
+			fpudna();
+			goto userout;
+
+		case T_FPOPFLT:		/* FPU operand fetch fault */
+			ucode = ILL_COPROC;
+			i = SIGILL;
+			break;
+
+		case T_XMMFLT:		/* SIMD floating-point exception */
+			ucode = fputrap_sse();
+			if (ucode == -1)
+				goto userout;
+			i = SIGFPE;
+			break;
+		}
+	} else {
+		/* kernel trap */
+
+		KASSERT(cold || td->td_ucred != NULL,
+		    ("kernel trap doesn't have ucred"));
+		switch (type) {
+		case T_PAGEFLT:			/* page fault */
+			(void) trap_pfault(frame, FALSE);
+			goto out;
+
+		case T_DNA:
+			KASSERT(!PCB_USER_FPU(td->td_pcb),
+			    ("Unregistered use of FPU in kernel"));
+			fpudna();
+			goto out;
+
+		case T_ARITHTRAP:	/* arithmetic trap */
+		case T_XMMFLT:		/* SIMD floating-point exception */
+		case T_FPOPFLT:		/* FPU operand fetch fault */
+			/*
+			 * XXXKIB for now disable any FPU traps in kernel
+			 * handler registration seems to be overkill
+			 */
+			trap_fatal(frame, 0);
+			goto out;
+
+		case T_STKFLT:		/* stack fault */
+			break;
+
+		case T_PROTFLT:		/* general protection fault */
+		case T_SEGNPFLT:	/* segment not present fault */
+			if (td->td_intr_nesting_level != 0)
+				break;
+
+			/*
+			 * Invalid segment selectors and out of bounds
+			 * %rip's and %rsp's can be set up in user mode.
+			 * This causes a fault in kernel mode when the
+			 * kernel tries to return to user mode.  We want
+			 * to get this fault so that we can fix the
+			 * problem here and not have to check all the
+			 * selectors and pointers when the user changes
+			 * them.
+			 */
+			if (frame->tf_rip == (long)doreti_iret) {
+				frame->tf_rip = (long)doreti_iret_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_ds) {
+				frame->tf_rip = (long)ds_load_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_es) {
+				frame->tf_rip = (long)es_load_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_fs) {
+				frame->tf_rip = (long)fs_load_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_gs) {
+				frame->tf_rip = (long)gs_load_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_gsbase) {
+				frame->tf_rip = (long)gsbase_load_fault;
+				goto out;
+			}
+			if (frame->tf_rip == (long)ld_fsbase) {
+				frame->tf_rip = (long)fsbase_load_fault;
+				goto out;
+			}
+			if (curpcb->pcb_onfault != NULL) {
+				frame->tf_rip = (long)curpcb->pcb_onfault;
+				goto out;
+			}
+			break;
+
+		case T_TSSFLT:
+			/*
+			 * PSL_NT can be set in user mode and isn't cleared
+			 * automatically when the kernel is entered.  This
+			 * causes a TSS fault when the kernel attempts to
+			 * `iret' because the TSS link is uninitialized.  We
+			 * want to get this fault so that we can fix the
+			 * problem here and not every time the kernel is
+			 * entered.
+			 */
+			if (frame->tf_rflags & PSL_NT) {
+				frame->tf_rflags &= ~PSL_NT;
+				goto out;
+			}
+			break;
+
+		case T_TRCTRAP:	 /* trace trap */
+			/*
+			 * Ignore debug register trace traps due to
+			 * accesses in the user's address space, which
+			 * can happen under several conditions such as
+			 * if a user sets a watchpoint on a buffer and
+			 * then passes that buffer to a system call.
+			 * We still want to get TRCTRAPS for addresses
+			 * in kernel space because that is useful when
+			 * debugging the kernel.
+			 */
+			if (user_dbreg_trap()) {
+				/*
+				 * Reset breakpoint bits because the
+				 * processor doesn't
+				 */
+				/* XXX check upper bits here */
+				load_dr6(rdr6() & 0xfffffff0);
+				goto out;
+			}
+			/*
+			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
+			 */
+		case T_BPTFLT:
+			/*
+			 * If KDB is enabled, let it handle the debugger trap.
+			 * Otherwise, debugger traps "can't happen".
+			 */
+#ifdef KDB
+			if (kdb_trap(type, 0, frame))
+				goto out;
+#endif
+			break;
+
+#ifdef DEV_ISA
+		case T_NMI:
+			/* machine/parity/power fail/"kitchen sink" faults */
+			if (isa_nmi(code) == 0) {
+#ifdef KDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (kdb_on_nmi) {
+					printf ("NMI ... going to debugger\n");
+					kdb_trap(type, 0, frame);
+				}
+#endif /* KDB */
+				goto out;
+			} else if (panic_on_nmi == 0)
+				goto out;
+			/* FALLTHROUGH */
+#endif /* DEV_ISA */
+		}
+
+		trap_fatal(frame, 0);
+		goto out;
+	}
+
+	/* Translate fault for emulators (e.g. Linux) */
+	if (*p->p_sysent->sv_transtrap)
+		i = (*p->p_sysent->sv_transtrap)(i, type);
+
+	ksiginfo_init_trap(&ksi);
+	ksi.ksi_signo = i;
+	ksi.ksi_code = ucode;
+	ksi.ksi_trapno = type;
+	ksi.ksi_addr = (void *)addr;
+	if (uprintf_signal) {
+		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
+		    "addr 0x%lx rip 0x%lx "
+		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
+		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
+		    frame->tf_rip,
+		    fubyte((void *)(frame->tf_rip + 0)),
+		    fubyte((void *)(frame->tf_rip + 1)),
+		    fubyte((void *)(frame->tf_rip + 2)),
+		    fubyte((void *)(frame->tf_rip + 3)),
+		    fubyte((void *)(frame->tf_rip + 4)),
+		    fubyte((void *)(frame->tf_rip + 5)),
+		    fubyte((void *)(frame->tf_rip + 6)),
+		    fubyte((void *)(frame->tf_rip + 7)));
+	}
+	trapsignal(td, &ksi);
+
+user:
+	userret(td, frame);
+	KASSERT(PCB_USER_FPU(td->td_pcb),
+	    ("Return from trap with kernel FPU ctx leaked"));
+userout:
+out:
+	return;
+}
+
+static int
+trap_pfault(frame, usermode)
+	struct trapframe *frame;
+	int usermode;
+{
+	vm_offset_t va;
+	struct vmspace *vm;
+	vm_map_t map;
+	int rv = 0;
+	vm_prot_t ftype;
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	vm_offset_t eva = frame->tf_addr;
+
+	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+		/*
+		 * Due to both processor errata and lazy TLB invalidation when
+		 * access restrictions are removed from virtual pages, memory
+		 * accesses that are allowed by the physical mapping layer may
+		 * nonetheless cause one spurious page fault per virtual page. 
+		 * When the thread is executing a "no faulting" section that
+		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+		 * every page fault is treated as a spurious page fault,
+		 * unless it accesses the same virtual address as the most
+		 * recent page fault within the same "no faulting" section.
+		 */
+		if (td->td_md.md_spurflt_addr != eva ||
+		    (td->td_pflags & TDP_RESETSPUR) != 0) {
+			/*
+			 * Do nothing to the TLB.  A stale TLB entry is
+			 * flushed automatically by a page fault.
+			 */
+			td->td_md.md_spurflt_addr = eva;
+			td->td_pflags &= ~TDP_RESETSPUR;
+			return (0);
+		}
+	} else {
+		/*
+		 * If we get a page fault while in a critical section, then
+		 * it is most likely a fatal kernel page fault.  The kernel
+		 * is already going to panic trying to get a sleep lock to
+		 * do the VM lookup, so just consider it a fatal trap so the
+		 * kernel can print out a useful trap message and even get
+		 * to the debugger.
+		 *
+		 * If we get a page fault while holding a non-sleepable
+		 * lock, then it is most likely a fatal kernel page fault.
+		 * If WITNESS is enabled, then it's going to whine about
+		 * bogus LORs with various VM locks, so just skip to the
+		 * fatal trap handling directly.
+		 */
+		if (td->td_critnest != 0 ||
+		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+		    "Kernel page fault") != 0) {
+			trap_fatal(frame, eva);
+			return (-1);
+		}
+	}
+	va = trunc_page(eva);
+	if (va >= VM_MIN_KERNEL_ADDRESS) {
+		/*
+		 * Don't allow user-mode faults in kernel address space.
+		 */
+		if (usermode)
+			goto nogo;
+
+		map = kernel_map;
+	} else {
+		/*
+		 * This is a fault on non-kernel virtual memory.  If either
+		 * p or p->p_vmspace is NULL, then the fault is fatal.
+		 */
+		if (p == NULL || (vm = p->p_vmspace) == NULL)
+			goto nogo;
+
+		map = &vm->vm_map;
+
+		/*
+		 * When accessing a usermode address, kernel must be
+		 * ready to accept the page fault, and provide a
+		 * handling routine.  Since accessing the address
+		 * without the handler is a bug, do not try to handle
+		 * it normally, and panic immediately.
+		 */
+		if (!usermode && (td->td_intr_nesting_level != 0 ||
+		    curpcb->pcb_onfault == NULL)) {
+			trap_fatal(frame, eva);
+			return (-1);
+		}
+	}
+
+	/*
+	 * PGEX_I is defined only if the execute disable bit capability is
+	 * supported and enabled.
+	 */
+	if (frame->tf_err & PGEX_W)
+		ftype = VM_PROT_WRITE;
+	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
+		ftype = VM_PROT_EXECUTE;
+	else
+		ftype = VM_PROT_READ;
+
+	if (map != kernel_map) {
+		/*
+		 * Keep swapout from messing with us during this
+		 *	critical time.
+		 */
+		PROC_LOCK(p);
+		++p->p_lock;
+		PROC_UNLOCK(p);
+
+		/* Fault in the user page: */
+		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+
+		PROC_LOCK(p);
+		--p->p_lock;
+		PROC_UNLOCK(p);
+	} else {
+		/*
+		 * Don't have to worry about process locking or stacks in the
+		 * kernel.
+		 */
+		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	}
+	if (rv == KERN_SUCCESS) {
+#ifdef HWPMC_HOOKS
+		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
+			PMC_SOFT_CALL_TF( , , page_fault, all, frame);
+			if (ftype == VM_PROT_READ)
+				PMC_SOFT_CALL_TF( , , page_fault, read,
+				    frame);
+			else
+				PMC_SOFT_CALL_TF( , , page_fault, write,
+				    frame);
+		}
+#endif
+		return (0);
+	}
+nogo:
+	if (!usermode) {
+		if (td->td_intr_nesting_level == 0 &&
+		    curpcb->pcb_onfault != NULL) {
+			frame->tf_rip = (long)curpcb->pcb_onfault;
+			return (0);
+		}
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+
+static void
+trap_fatal(frame, eva)
+	struct trapframe *frame;
+	vm_offset_t eva;
+{
+	int code, ss;
+	u_int type;
+	long esp;
+	struct soft_segment_descriptor softseg;
+	char *msg;
+
+	code = frame->tf_err;
+	type = frame->tf_trapno;
+	sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)],
+	    &softseg);
+
+	if (type <= MAX_TRAP_MSG)
+		msg = trap_msg[type];
+	else
+		msg = "UNKNOWN";
+	printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
+	    ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+	/* two separate prints in case of a trap on an unmapped page */
+	printf("cpuid = %d; ", PCPU_GET(cpuid));
+	printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+	if (type == T_PAGEFLT) {
+		printf("fault virtual address	= 0x%lx\n", eva);
+		printf("fault code		= %s %s %s, %s\n",
+			code & PGEX_U ? "user" : "supervisor",
+			code & PGEX_W ? "write" : "read",
+			code & PGEX_I ? "instruction" : "data",
+			code & PGEX_P ? "protection violation" : "page not present");
+	}
+	printf("instruction pointer	= 0x%lx:0x%lx\n",
+	       frame->tf_cs & 0xffff, frame->tf_rip);
+        if (ISPL(frame->tf_cs) == SEL_UPL) {
+		ss = frame->tf_ss & 0xffff;
+		esp = frame->tf_rsp;
+	} else {
+		ss = GSEL(GDATA_SEL, SEL_KPL);
+		esp = (long)&frame->tf_rsp;
+	}
+	printf("stack pointer	        = 0x%x:0x%lx\n", ss, esp);
+	printf("frame pointer	        = 0x%x:0x%lx\n", ss, frame->tf_rbp);
+	printf("code segment		= base 0x%lx, limit 0x%lx, type 0x%x\n",
+	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
+	printf("			= DPL %d, pres %d, long %d, def32 %d, gran %d\n",
+	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
+	       softseg.ssd_gran);
+	printf("processor eflags	= ");
+	if (frame->tf_rflags & PSL_T)
+		printf("trace trap, ");
+	if (frame->tf_rflags & PSL_I)
+		printf("interrupt enabled, ");
+	if (frame->tf_rflags & PSL_NT)
+		printf("nested task, ");
+	if (frame->tf_rflags & PSL_RF)
+		printf("resume, ");
+	printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
+	printf("current process		= ");
+	if (curproc) {
+		printf("%lu (%s)\n",
+		    (u_long)curproc->p_pid, curthread->td_name ?
+		    curthread->td_name : "");
+	} else {
+		printf("Idle\n");
+	}
+
+#ifdef KDB
+	if (debugger_on_panic || kdb_active)
+		if (kdb_trap(type, 0, frame))
+			return;
+#endif
+	printf("trap number		= %d\n", type);
+	if (type <= MAX_TRAP_MSG)
+		panic("%s", trap_msg[type]);
+	else
+		panic("unknown/reserved trap");
+}
+
+/*
+ * Double fault handler. Called when a fault occurs while writing
+ * a frame for a trap/exception onto the stack. This usually occurs
+ * when the stack overflows (such is the case with infinite recursion,
+ * for example).
+ */
+void
+dblfault_handler(struct trapframe *frame)
+{
+#ifdef KDTRACE_HOOKS
+	if (dtrace_doubletrap_func != NULL)
+		(*dtrace_doubletrap_func)();
+#endif
+	printf("\nFatal double fault\n");
+	printf("rip = 0x%lx\n", frame->tf_rip);
+	printf("rsp = 0x%lx\n", frame->tf_rsp);
+	printf("rbp = 0x%lx\n", frame->tf_rbp);
+#ifdef SMP
+	/* two separate prints in case of a trap on an unmapped page */
+	printf("cpuid = %d; ", PCPU_GET(cpuid));
+	printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
+	panic("double fault");
+}
+
+int
+cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+{
+	struct proc *p;
+	struct trapframe *frame;
+	register_t *argp;
+	caddr_t params;
+	int reg, regcnt, error;
+
+	p = td->td_proc;
+	frame = td->td_frame;
+	reg = 0;
+	regcnt = 6;
+
+	params = (caddr_t)frame->tf_rsp + sizeof(register_t);
+	sa->code = frame->tf_rax;
+
+	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
+		sa->code = frame->tf_rdi;
+		reg++;
+		regcnt--;
+	}
+ 	if (p->p_sysent->sv_mask)
+ 		sa->code &= p->p_sysent->sv_mask;
+
+ 	if (sa->code >= p->p_sysent->sv_size)
+ 		sa->callp = &p->p_sysent->sv_table[0];
+  	else
+ 		sa->callp = &p->p_sysent->sv_table[sa->code];
+
+	sa->narg = sa->callp->sy_narg;
+	KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]),
+	    ("Too many syscall arguments!"));
+	error = 0;
+	argp = &frame->tf_rdi;
+	argp += reg;
+	bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt);
+	if (sa->narg > regcnt) {
+		KASSERT(params != NULL, ("copyin args with no params!"));
+		error = copyin(params, &sa->args[regcnt],
+	    	    (sa->narg - regcnt) * sizeof(sa->args[0]));
+	}
+
+	if (error == 0) {
+		td->td_retval[0] = 0;
+		td->td_retval[1] = frame->tf_rdx;
+	}
+
+	return (error);
+}
+
+#include "../../kern/subr_syscall.c"
+
+/*
+ * System call handler for native binaries.  The trap frame is already
+ * set up by the assembler trampoline and a pointer to it is saved in
+ * td_frame.
+ */
+void
+amd64_syscall(struct thread *td, int traced)
+{
+	struct syscall_args sa;
+	int error;
+	ksiginfo_t ksi;
+
+#ifdef DIAGNOSTIC
+	if (ISPL(td->td_frame->tf_cs) != SEL_UPL) {
+		panic("syscall");
+		/* NOT REACHED */
+	}
+#endif
+	error = syscallenter(td, &sa);
+
+	/*
+	 * Traced syscall.
+	 */
+	if (__predict_false(traced)) {
+		td->td_frame->tf_rflags &= ~PSL_T;
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGTRAP;
+		ksi.ksi_code = TRAP_TRACE;
+		ksi.ksi_addr = (void *)td->td_frame->tf_rip;
+		trapsignal(td, &ksi);
+	}
+
+	KASSERT(PCB_USER_FPU(td->td_pcb),
+	    ("System call %s returing with kernel FPU ctx leaked",
+	     syscallname(td->td_proc, sa.code)));
+	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
+	    ("System call %s returning with mangled pcb_save",
+	     syscallname(td->td_proc, sa.code)));
+
+	syscallret(td, error, &sa);
+
+	/*
+	 * If the user-supplied value of %rip is not a canonical
+	 * address, then some CPUs will trigger a ring 0 #GP during
+	 * the sysret instruction.  However, the fault handler would
+	 * execute in ring 0 with the user's %gs and %rsp which would
+	 * not be safe.  Instead, use the full return path which
+	 * catches the problem safely.
+	 */
+	if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)
+		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+}
diff --git a/sys/amd64/amd64/uio_machdep.c b/sys/amd64/amd64/uio_machdep.c
new file mode 100644
index 0000000..2d24c7c
--- /dev/null
+++ b/sys/amd64/amd64/uio_machdep.c
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu>
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_subr.c	8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <machine/vmparam.h>
+
+/*
+ * Implement uiomove(9) from physical memory using the direct map to
+ * avoid the creation and destruction of ephemeral mappings.
+ */
+int
+uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
+{
+	struct thread *td = curthread;
+	struct iovec *iov;
+	void *cp;
+	vm_offset_t page_offset;
+	size_t cnt;
+	int error = 0;
+	int save = 0;
+
+	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
+	    ("uiomove_fromphys: mode"));
+	KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
+	    ("uiomove_fromphys proc"));
+	save = td->td_pflags & TDP_DEADLKTREAT;
+	td->td_pflags |= TDP_DEADLKTREAT;
+	while (n > 0 && uio->uio_resid) {
+		iov = uio->uio_iov;
+		cnt = iov->iov_len;
+		if (cnt == 0) {
+			uio->uio_iov++;
+			uio->uio_iovcnt--;
+			continue;
+		}
+		if (cnt > n)
+			cnt = n;
+		page_offset = offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - page_offset);
+		cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) +
+		    page_offset;
+		switch (uio->uio_segflg) {
+		case UIO_USERSPACE:
+			maybe_yield();
+			if (uio->uio_rw == UIO_READ)
+				error = copyout(cp, iov->iov_base, cnt);
+			else
+				error = copyin(iov->iov_base, cp, cnt);
+			if (error)
+				goto out;
+			break;
+		case UIO_SYSSPACE:
+			if (uio->uio_rw == UIO_READ)
+				bcopy(cp, iov->iov_base, cnt);
+			else
+				bcopy(iov->iov_base, cp, cnt);
+			break;
+		case UIO_NOCOPY:
+			break;
+		}
+		iov->iov_base = (char *)iov->iov_base + cnt;
+		iov->iov_len -= cnt;
+		uio->uio_resid -= cnt;
+		uio->uio_offset += cnt;
+		offset += cnt;
+		n -= cnt;
+	}
+out:
+	if (save == 0)
+		td->td_pflags &= ~TDP_DEADLKTREAT;
+	return (error);
+}
diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c
new file mode 100644
index 0000000..c4ca677
--- /dev/null
+++ b/sys/amd64/amd64/uma_machdep.c
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/uma.h>
+#include <vm/uma_int.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+
+void *
+uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	vm_page_t m;
+	vm_paddr_t pa;
+	void *va;
+	int pflags;
+
+	*flags = UMA_SLAB_PRIV;
+	pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
+	for (;;) {
+		m = vm_page_alloc(NULL, 0, pflags);
+		if (m == NULL) {
+			if (wait & M_NOWAIT)
+				return (NULL);
+			else
+				VM_WAIT;
+		} else
+			break;
+	}
+	pa = m->phys_addr;
+	if ((wait & M_NODUMP) == 0)
+		dump_add_page(pa);
+	va = (void *)PHYS_TO_DMAP(pa);
+	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
+		pagezero(va);
+	return (va);
+}
+
+void
+uma_small_free(void *mem, int size, u_int8_t flags)
+{
+	vm_page_t m;
+	vm_paddr_t pa;
+
+	pa = DMAP_TO_PHYS((vm_offset_t)mem);
+	dump_drop_page(pa);
+	m = PHYS_TO_VM_PAGE(pa);
+	m->wire_count--;
+	vm_page_free(m);
+	atomic_subtract_int(&cnt.v_wire_count, 1);
+}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
new file mode 100644
index 0000000..9883715
--- /dev/null
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -0,0 +1,750 @@
+/*-
+ * Copyright (c) 1982, 1986 The Regents of the University of California.
+ * Copyright (c) 1989, 1990 William Jolitz
+ * Copyright (c) 1994 John Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
+ *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_isa.h"
+#include "opt_cpu.h"
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/pioctl.h>
+#include <sys/proc.h>
+#include <sys/sf_buf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+#include <machine/tss.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_param.h>
+
+#include <x86/isa/isa.h>
+
+static void	cpu_reset_real(void);
+#ifdef SMP
+static void	cpu_reset_proxy(void);
+static u_int	cpu_reset_proxyid;
+static volatile u_int	cpu_reset_proxy_active;
+#endif
+
+CTASSERT((struct thread **)OFFSETOF_CURTHREAD ==
+    &((struct pcpu *)NULL)->pc_curthread);
+CTASSERT((struct pcb **)OFFSETOF_CURPCB == &((struct pcpu *)NULL)->pc_curpcb);
+
+struct savefpu *
+get_pcb_user_save_td(struct thread *td)
+{
+	vm_offset_t p;
+
+	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+	    cpu_max_ext_state_size;
+	KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area"));
+	return ((struct savefpu *)p);
+}
+
+struct savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+	vm_offset_t p;
+
+	p = (vm_offset_t)(pcb + 1);
+	return ((struct savefpu *)p);
+}
+
+struct pcb *
+get_pcb_td(struct thread *td)
+{
+	vm_offset_t p;
+
+	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+	    cpu_max_ext_state_size - sizeof(struct pcb);
+	return ((struct pcb *)p);
+}
+
+void *
+alloc_fpusave(int flags)
+{
+	struct pcb *res;
+	struct savefpu_ymm *sf;
+
+	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
+	if (use_xsave) {
+		sf = (struct savefpu_ymm *)res;
+		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
+		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
+	}
+	return (res);
+}
+
+/*
+ * Finish a fork operation, with process p2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(td1, p2, td2, flags)
+	register struct thread *td1;
+	register struct proc *p2;
+	struct thread *td2;
+	int flags;
+{
+	register struct proc *p1;
+	struct pcb *pcb2;
+	struct mdproc *mdp1, *mdp2;
+	struct proc_ldt *pldt;
+	pmap_t pmap2;
+
+	p1 = td1->td_proc;
+	if ((flags & RFPROC) == 0) {
+		if ((flags & RFMEM) == 0) {
+			/* unshare user LDT */
+			mdp1 = &p1->p_md;
+			mtx_lock(&dt_lock);
+			if ((pldt = mdp1->md_ldt) != NULL &&
+			    pldt->ldt_refcnt > 1 &&
+			    user_ldt_alloc(p1, 1) == NULL)
+				panic("could not copy LDT");
+			mtx_unlock(&dt_lock);
+		}
+		return;
+	}
+
+	/* Ensure that td1's pcb is up to date. */
+	fpuexit(td1);
+
+	/* Point the pcb to the top of the stack */
+	pcb2 = get_pcb_td(td2);
+	td2->td_pcb = pcb2;
+
+	/* Copy td1's pcb */
+	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
+
+	/* Properly initialize pcb_save */
+	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+	bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
+	    cpu_max_ext_state_size);
+
+	/* Point mdproc and then copy over td1's contents */
+	mdp2 = &p2->p_md;
+	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
+
+	/*
+	 * Create a new fresh stack for the new process.
+	 * Copy the trap frame for the return to user mode as if from a
+	 * syscall.  This copies most of the user mode register values.
+	 */
+	td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
+	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
+
+	td2->td_frame->tf_rax = 0;		/* Child returns zero */
+	td2->td_frame->tf_rflags &= ~PSL_C;	/* success */
+	td2->td_frame->tf_rdx = 1;
+
+	/*
+	 * If the parent process has the trap bit set (i.e. a debugger had
+	 * single stepped the process to the system call), we need to clear
+	 * the trap flag from the new frame unless the debugger had set PF_FORK
+	 * on the parent.  Otherwise, the child will receive a (likely
+	 * unexpected) SIGTRAP when it executes the first instruction after
+	 * returning  to userland.
+	 */
+	if ((p1->p_pfsflags & PF_FORK) == 0)
+		td2->td_frame->tf_rflags &= ~PSL_T;
+
+	/*
+	 * Set registers for trampoline to user mode.  Leave space for the
+	 * return address on stack.  These are the kernel mode register values.
+	 */
+	pmap2 = vmspace_pmap(p2->p_vmspace);
+	pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
+	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
+	pcb2->pcb_rbp = 0;
+	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
+	pcb2->pcb_rbx = (register_t)td2;		/* fork_trampoline argument */
+	pcb2->pcb_rip = (register_t)fork_trampoline;
+	/*-
+	 * pcb2->pcb_dr*:	cloned above.
+	 * pcb2->pcb_savefpu:	cloned above.
+	 * pcb2->pcb_flags:	cloned above.
+	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
+	 * pcb2->pcb_[fg]sbase:	cloned above
+	 */
+
+	/* Setup to release spin count in fork_exit(). */
+	td2->td_md.md_spinlock_count = 1;
+	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+
+	/* As an i386, do not copy io permission bitmap. */
+	pcb2->pcb_tssp = NULL;
+
+	/* New segment registers. */
+	set_pcb_flags(pcb2, PCB_FULL_IRET);
+
+	/* Copy the LDT, if necessary. */
+	mdp1 = &td1->td_proc->p_md;
+	mdp2 = &p2->p_md;
+	mtx_lock(&dt_lock);
+	if (mdp1->md_ldt != NULL) {
+		if (flags & RFMEM) {
+			mdp1->md_ldt->ldt_refcnt++;
+			mdp2->md_ldt = mdp1->md_ldt;
+			bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
+			    system_segment_descriptor));
+		} else {
+			mdp2->md_ldt = NULL;
+			mdp2->md_ldt = user_ldt_alloc(p2, 0);
+			if (mdp2->md_ldt == NULL)
+				panic("could not copy LDT");
+			amd64_set_ldt_data(td2, 0, max_ldt_segment,
+			    (struct user_segment_descriptor *)
+			    mdp1->md_ldt->ldt_base);
+		}
+	} else
+		mdp2->md_ldt = NULL;
+	mtx_unlock(&dt_lock);
+
+	/*
+	 * Now, cpu_switch() can schedule the new process.
+	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
+	 * containing the return address when exiting cpu_switch.
+	 * This will normally be to fork_trampoline(), which will have
+	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
+	 * will set up a stack to call fork_return(p, frame); to complete
+	 * the return to user-mode.
+	 */
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_set_fork_handler(td, func, arg)
+	struct thread *td;
+	void (*func)(void *);
+	void *arg;
+{
+	/*
+	 * Note that the trap frame follows the args, so the function
+	 * is really called like this:  func(arg, frame);
+	 */
+	td->td_pcb->pcb_r12 = (long) func;	/* function */
+	td->td_pcb->pcb_rbx = (long) arg;	/* first arg */
+}
+
+void
+cpu_exit(struct thread *td)
+{
+
+	/*
+	 * If this process has a custom LDT, release it.
+	 */
+	mtx_lock(&dt_lock);
+	if (td->td_proc->p_md.md_ldt != 0)
+		user_ldt_free(td);
+	else
+		mtx_unlock(&dt_lock);
+}
+
+void
+cpu_thread_exit(struct thread *td)
+{
+	struct pcb *pcb;
+
+	critical_enter();
+	if (td == PCPU_GET(fpcurthread))
+		fpudrop();
+	critical_exit();
+
+	pcb = td->td_pcb;
+
+	/* Disable any hardware breakpoints. */
+	if (pcb->pcb_flags & PCB_DBREGS) {
+		reset_dbregs();
+		clear_pcb_flags(pcb, PCB_DBREGS);
+	}
+}
+
+void
+cpu_thread_clean(struct thread *td)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+
+	/*
+	 * Clean TSS/iomap
+	 */
+	if (pcb->pcb_tssp != NULL) {
+		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_tssp,
+		    ctob(IOPAGES + 1));
+		pcb->pcb_tssp = NULL;
+	}
+}
+
+void
+cpu_thread_swapin(struct thread *td)
+{
+}
+
+void
+cpu_thread_swapout(struct thread *td)
+{
+}
+
+void
+cpu_thread_alloc(struct thread *td)
+{
+	struct pcb *pcb;
+	struct xstate_hdr *xhdr;
+
+	td->td_pcb = pcb = get_pcb_td(td);
+	td->td_frame = (struct trapframe *)pcb - 1;
+	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
+		bzero(xhdr, sizeof(*xhdr));
+		xhdr->xstate_bv = xsave_mask;
+	}
+}
+
+void
+cpu_thread_free(struct thread *td)
+{
+
+	cpu_thread_clean(td);
+}
+
+void
+cpu_set_syscall_retval(struct thread *td, int error)
+{
+
+	switch (error) {
+	case 0:
+		td->td_frame->tf_rax = td->td_retval[0];
+		td->td_frame->tf_rdx = td->td_retval[1];
+		td->td_frame->tf_rflags &= ~PSL_C;
+		break;
+
+	case ERESTART:
+		/*
+		 * Reconstruct pc, we know that 'syscall' is 2 bytes,
+		 * lcall $X,y is 7 bytes, int 0x80 is 2 bytes.
+		 * We saved this in tf_err.
+		 * %r10 (which was holding the value of %rcx) is restored
+		 * for the next iteration.
+		 * %r10 restore is only required for freebsd/amd64 processes,
+		 * but shall be innocent for any ia32 ABI.
+		 */
+		td->td_frame->tf_rip -= td->td_frame->tf_err;
+		td->td_frame->tf_r10 = td->td_frame->tf_rcx;
+		break;
+
+	case EJUSTRETURN:
+		break;
+
+	default:
+		if (td->td_proc->p_sysent->sv_errsize) {
+			if (error >= td->td_proc->p_sysent->sv_errsize)
+				error = -1;	/* XXX */
+			else
+				error = td->td_proc->p_sysent->sv_errtbl[error];
+		}
+		td->td_frame->tf_rax = error;
+		td->td_frame->tf_rflags |= PSL_C;
+		break;
+	}
+}
+
+/*
+ * Initialize machine state (pcb and trap frame) for a new thread about to
+ * upcall. Put enough state in the new thread's PCB to get it to go back 
+ * userret(), where we can intercept it again to set the return (upcall)
+ * Address and stack, along with those from upcals that are from other sources
+ * such as those generated in thread_userret() itself.
+ */
+void
+cpu_set_upcall(struct thread *td, struct thread *td0)
+{
+	struct pcb *pcb2;
+
+	/* Point the pcb to the top of the stack. */
+	pcb2 = td->td_pcb;
+
+	/*
+	 * Copy the upcall pcb.  This loads kernel regs.
+	 * Those not loaded individually below get their default
+	 * values here.
+	 */
+	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
+	clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE);
+	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
+	    cpu_max_ext_state_size);
+	set_pcb_flags(pcb2, PCB_FULL_IRET);
+
+	/*
+	 * Create a new fresh stack for the new thread.
+	 */
+	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
+
+	/* If the current thread has the trap bit set (i.e. a debugger had
+	 * single stepped the process to the system call), we need to clear
+	 * the trap flag from the new frame. Otherwise, the new thread will
+	 * receive a (likely unexpected) SIGTRAP when it executes the first
+	 * instruction after returning to userland.
+	 */
+	td->td_frame->tf_rflags &= ~PSL_T;
+
+	/*
+	 * Set registers for trampoline to user mode.  Leave space for the
+	 * return address on stack.  These are the kernel mode register values.
+	 */
+	pcb2->pcb_r12 = (register_t)fork_return;	    /* trampoline arg */
+	pcb2->pcb_rbp = 0;
+	pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *);	/* trampoline arg */
+	pcb2->pcb_rbx = (register_t)td;			    /* trampoline arg */
+	pcb2->pcb_rip = (register_t)fork_trampoline;
+	/*
+	 * If we didn't copy the pcb, we'd need to do the following registers:
+	 * pcb2->pcb_cr3:	cloned above.
+	 * pcb2->pcb_dr*:	cloned above.
+	 * pcb2->pcb_savefpu:	cloned above.
+	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
+	 * pcb2->pcb_[fg]sbase: cloned above
+	 */
+
+	/* Setup to release spin count in fork_exit(). */
+	td->td_md.md_spinlock_count = 1;
+	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+}
+
+/*
+ * Set that machine state for performing an upcall that has to
+ * be done in thread_userret() so that those upcalls generated
+ * in thread_userret() itself can be done as well.
+ */
+void
+cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
+	stack_t *stack)
+{
+
+	/* 
+	 * Do any extra cleaning that needs to be done.
+	 * The thread may have optional components
+	 * that are not present in a fresh thread.
+	 * This may be a recycled thread so make it look
+	 * as though it's newly allocated.
+	 */
+	cpu_thread_clean(td);
+
+#ifdef COMPAT_FREEBSD32
+	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
+		/*
+	 	 * Set the trap frame to point at the beginning of the uts
+		 * function.
+		 */
+		td->td_frame->tf_rbp = 0;
+		td->td_frame->tf_rsp =
+		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
+		td->td_frame->tf_rip = (uintptr_t)entry;
+
+		/*
+		 * Pass the address of the mailbox for this kse to the uts
+		 * function as a parameter on the stack.
+		 */
+		suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
+		    (uint32_t)(uintptr_t)arg);
+
+		return;
+	}
+#endif
+
+	/*
+	 * Set the trap frame to point at the beginning of the uts
+	 * function.
+	 */
+	td->td_frame->tf_rbp = 0;
+	td->td_frame->tf_rsp =
+	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
+	td->td_frame->tf_rsp -= 8;
+	td->td_frame->tf_rip = (register_t)entry;
+	td->td_frame->tf_ds = _udatasel;
+	td->td_frame->tf_es = _udatasel;
+	td->td_frame->tf_fs = _ufssel;
+	td->td_frame->tf_gs = _ugssel;
+	td->td_frame->tf_flags = TF_HASSEGS;
+
+	/*
+	 * Pass the address of the mailbox for this kse to the uts
+	 * function as a parameter on the stack.
+	 */
+	td->td_frame->tf_rdi = (register_t)arg;
+}
+
+int
+cpu_set_user_tls(struct thread *td, void *tls_base)
+{
+	struct pcb *pcb;
+
+	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
+		return (EINVAL);
+
+	pcb = td->td_pcb;
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+#ifdef COMPAT_FREEBSD32
+	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
+		pcb->pcb_gsbase = (register_t)tls_base;
+		return (0);
+	}
+#endif
+	pcb->pcb_fsbase = (register_t)tls_base;
+	return (0);
+}
+
+#ifdef SMP
+static void
+cpu_reset_proxy()
+{
+	cpuset_t tcrp;
+
+	cpu_reset_proxy_active = 1;
+	while (cpu_reset_proxy_active == 1)
+		ia32_pause(); /* Wait for other cpu to see that we've started */
+
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
+	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
+	DELAY(1000000);
+	cpu_reset_real();
+}
+#endif
+
+void
+cpu_reset()
+{
+#ifdef SMP
+	cpuset_t map;
+	u_int cnt;
+
+	if (smp_active) {
+		map = all_cpus;
+		CPU_CLR(PCPU_GET(cpuid), &map);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
+			printf("cpu_reset: Stopping other CPUs\n");
+			stop_cpus(map);
+		}
+
+		if (PCPU_GET(cpuid) != 0) {
+			cpu_reset_proxyid = PCPU_GET(cpuid);
+			cpustop_restartfunc = cpu_reset_proxy;
+			cpu_reset_proxy_active = 0;
+			printf("cpu_reset: Restarting BSP\n");
+
+			/* Restart CPU #0. */
+			CPU_SETOF(0, &started_cpus);
+			wmb();
+
+			cnt = 0;
+			while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
+				ia32_pause();
+				cnt++;	/* Wait for BSP to announce restart */
+			}
+			if (cpu_reset_proxy_active == 0)
+				printf("cpu_reset: Failed to restart BSP\n");
+			enable_intr();
+			cpu_reset_proxy_active = 2;
+
+			while (1)
+				ia32_pause();
+			/* NOTREACHED */
+		}
+
+		DELAY(1000000);
+	}
+#endif
+	cpu_reset_real();
+	/* NOTREACHED */
+}
+
+static void
+cpu_reset_real()
+{
+	struct region_descriptor null_idt;
+	int b;
+
+	disable_intr();
+
+	/*
+	 * Attempt to do a CPU reset via the keyboard controller,
+	 * do not turn off GateA20, as any machine that fails
+	 * to do the reset here would then end up in no man's land.
+	 */
+	outb(IO_KBD + 4, 0xFE);
+	DELAY(500000);	/* wait 0.5 sec to see if that did it */
+
+	/*
+	 * Attempt to force a reset via the Reset Control register at
+	 * I/O port 0xcf9.  Bit 2 forces a system reset when it
+	 * transitions from 0 to 1.  Bit 1 selects the type of reset
+	 * to attempt: 0 selects a "soft" reset, and 1 selects a
+	 * "hard" reset.  We try a "hard" reset.  The first write sets
+	 * bit 1 to select a "hard" reset and clears bit 2.  The
+	 * second write forces a 0 -> 1 transition in bit 2 to trigger
+	 * a reset.
+	 */
+	outb(0xcf9, 0x2);
+	outb(0xcf9, 0x6);
+	DELAY(500000);  /* wait 0.5 sec to see if that did it */
+
+	/*
+	 * Attempt to force a reset via the Fast A20 and Init register
+	 * at I/O port 0x92.  Bit 1 serves as an alternate A20 gate.
+	 * Bit 0 asserts INIT# when set to 1.  We are careful to only
+	 * preserve bit 1 while setting bit 0.  We also must clear bit
+	 * 0 before setting it if it isn't already clear.
+	 */
+	b = inb(0x92);
+	if (b != 0xff) {
+		if ((b & 0x1) != 0)
+			outb(0x92, b & 0xfe);
+		outb(0x92, b | 0x1);
+		DELAY(500000);  /* wait 0.5 sec to see if that did it */
+	}
+
+	printf("No known reset method worked, attempting CPU shutdown\n");
+	DELAY(1000000);	/* wait 1 sec for printf to complete */
+
+	/* Wipe the IDT. */
+	null_idt.rd_limit = 0;
+	null_idt.rd_base = 0;
+	lidt(&null_idt);
+
+	/* "good night, sweet prince .... <THUNK!>" */
+	breakpoint();
+
+	/* NOTREACHED */
+	while(1);
+}
+
+/*
+ * Allocate an sf_buf for the given vm_page.  On this machine, however, there
+ * is no sf_buf object.  Instead, an opaque pointer to the given vm_page is
+ * returned.
+ */
+struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int pri)
+{
+
+	return ((struct sf_buf *)m);
+}
+
+/*
+ * Free the sf_buf.  In fact, do nothing because there are no resources
+ * associated with the sf_buf.
+ */
+void
+sf_buf_free(struct sf_buf *sf)
+{
+}
+
+/*
+ * Software interrupt handler for queued VM system processing.
+ */   
+void  
+swi_vm(void *dummy) 
+{     
+	if (busdma_swi_pending != 0)
+		busdma_swi();
+}
+
+/*
+ * Tell whether this address is in some physical memory region.
+ * Currently used by the kernel coredump code in order to avoid
+ * dumping the ``ISA memory hole'' which could cause indefinite hangs,
+ * or other unpredictable behaviour.
+ */
+
+int
+is_physical_memory(vm_paddr_t addr)
+{
+
+#ifdef DEV_ISA
+	/* The ISA ``memory hole''. */
+	if (addr >= 0xa0000 && addr < 0x100000)
+		return 0;
+#endif
+
+	/*
+	 * stuff other tests for known memory-mapped devices (PCI?)
+	 * here
+	 */
+
+	return 1;
+}
diff --git a/sys/amd64/compile/.cvsignore b/sys/amd64/compile/.cvsignore
new file mode 100644
index 0000000..232298e
--- /dev/null
+++ b/sys/amd64/compile/.cvsignore
@@ -0,0 +1 @@
+[A-Za-z0-9]*
diff --git a/sys/amd64/conf/.cvsignore b/sys/amd64/conf/.cvsignore
new file mode 100644
index 0000000..232298e
--- /dev/null
+++ b/sys/amd64/conf/.cvsignore
@@ -0,0 +1 @@
+[A-Za-z0-9]*
diff --git a/sys/amd64/conf/DEFAULTS b/sys/amd64/conf/DEFAULTS
new file mode 100644
index 0000000..2c221cb
--- /dev/null
+++ b/sys/amd64/conf/DEFAULTS
@@ -0,0 +1,24 @@
+#
+# DEFAULTS -- Default kernel configuration file for FreeBSD/amd64
+#
+# $FreeBSD$
+
+machine		amd64
+
+# Bus support.
+device		isa
+
+# Pseudo devices.
+device		mem		# Memory and kernel memory devices
+device		io		# I/O device
+
+# UART chips on this platform
+device		uart_ns8250
+
+# Default partitioning schemes
+options 	GEOM_PART_BSD
+options 	GEOM_PART_EBR
+options 	GEOM_PART_EBR_COMPAT
+options 	GEOM_PART_MBR
+
+options 	NEW_PCIB
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
new file mode 100644
index 0000000..1043b2b
--- /dev/null
+++ b/sys/amd64/conf/GENERIC
@@ -0,0 +1,340 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/amd64
+#
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
+#
+#    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD$
+
+cpu		HAMMER
+ident		GENERIC
+
+makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
+makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
+
+options 	SCHED_ULE		# ULE scheduler
+options 	PREEMPTION		# Enable kernel thread preemption
+options 	INET			# InterNETworking
+options 	INET6			# IPv6 communications protocols
+options 	TCP_OFFLOAD		# TCP offload
+options 	SCTP			# Stream Control Transmission Protocol
+options 	FFS			# Berkeley Fast Filesystem
+options 	SOFTUPDATES		# Enable FFS soft updates support
+options 	UFS_ACL			# Support for access control lists
+options 	UFS_DIRHASH		# Improve performance on big directories
+options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
+options 	QUOTA			# Enable disk quotas for UFS
+options 	MD_ROOT			# MD is a potential root device
+options 	NFSCL			# New Network Filesystem Client
+options 	NFSD			# New Network Filesystem Server
+options 	NFSLOCKD		# Network Lock Manager
+options 	NFS_ROOT		# NFS usable as /, requires NFSCL
+options 	MSDOSFS			# MSDOS Filesystem
+options 	CD9660			# ISO 9660 Filesystem
+options 	PROCFS			# Process filesystem (requires PSEUDOFS)
+options 	PSEUDOFS		# Pseudo-filesystem framework
+options 	GEOM_PART_GPT		# GUID Partition Tables.
+options 	GEOM_RAID		# Soft RAID functionality.
+options 	GEOM_LABEL		# Provides labelization
+options 	COMPAT_FREEBSD32	# Compatible with i386 binaries
+options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
+options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
+options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
+options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
+options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
+options 	KTRACE			# ktrace(1) support
+options 	STACK			# stack(9) support
+options 	SYSVSHM			# SYSV-style shared memory
+options 	SYSVMSG			# SYSV-style message queues
+options 	SYSVSEM			# SYSV-style semaphores
+options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
+options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
+options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
+options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
+options 	AUDIT			# Security event auditing
+options 	CAPABILITY_MODE		# Capsicum capability mode
+options 	CAPABILITIES		# Capsicum capabilities
+options 	MAC			# TrustedBSD MAC Framework
+options 	KDTRACE_FRAME		# Ensure frames are compiled in
+options 	KDTRACE_HOOKS		# Kernel DTrace hooks
+options 	DDB_CTF			# Kernel ELF linker loads CTF data
+options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
+
+# Debugging support.  Always need this:
+options 	KDB			# Enable kernel debugger support.
+# For minimum debugger support (stable branch) use:
+#options 	KDB_TRACE		# Print a stack trace for a panic.
+# For full debugger support use this instead:
+options 	DDB			# Support DDB.
+options 	GDB			# Support remote GDB.
+options 	DEADLKRES		# Enable the deadlock resolver
+options 	INVARIANTS		# Enable calls of extra sanity checking
+options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
+options 	WITNESS			# Enable checks to detect deadlocks and cycles
+options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
+options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
+
+# Make an SMP-capable kernel by default
+options 	SMP			# Symmetric MultiProcessor Kernel
+
+# CPU frequency control
+device		cpufreq
+
+# Bus support.
+device		acpi
+device		pci
+
+# Floppy drives
+device		fdc
+
+# ATA controllers
+device		ahci		# AHCI-compatible SATA controllers
+device		ata		# Legacy ATA/SATA controllers
+options 	ATA_STATIC_ID	# Static device numbering
+device		mvs		# Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA
+device		siis		# SiliconImage SiI3124/SiI3132/SiI3531 SATA
+
+# SCSI Controllers
+device		ahc		# AHA2940 and onboard AIC7xxx devices
+options 	AHC_REG_PRETTY_PRINT	# Print register bitfields in debug
+					# output.  Adds ~128k to driver.
+device		ahd		# AHA39320/29320 and onboard AIC79xx devices
+options 	AHD_REG_PRETTY_PRINT	# Print register bitfields in debug
+					# output.  Adds ~215k to driver.
+device		esp		# AMD Am53C974 (Tekram DC-390(T))
+device		hptiop		# Highpoint RocketRaid 3xxx series
+device		isp		# Qlogic family
+#device		ispfw		# Firmware for QLogic HBAs- normally a module
+device		mpt		# LSI-Logic MPT-Fusion
+device		mps		# LSI-Logic MPT-Fusion 2
+#device		ncr		# NCR/Symbios Logic
+device		sym		# NCR/Symbios Logic (newer chipsets + those of `ncr')
+device		trm		# Tekram DC395U/UW/F DC315U adapters
+
+device		adv		# Advansys SCSI adapters
+device		adw		# Advansys wide SCSI adapters
+device		aic		# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
+device		bt		# Buslogic/Mylex MultiMaster SCSI adapters
+device		isci		# Intel C600 SAS controller
+
+# ATA/SCSI peripherals
+device		scbus		# SCSI bus (required for ATA/SCSI)
+device		ch		# SCSI media changers
+device		da		# Direct Access (disks)
+device		sa		# Sequential Access (tape etc)
+device		cd		# CD
+device		pass		# Passthrough device (direct ATA/SCSI access)
+device		ses		# Enclosure Services (SES and SAF-TE)
+#device		ctl		# CAM Target Layer
+
+# RAID controllers interfaced to the SCSI subsystem
+device		amr		# AMI MegaRAID
+device		arcmsr		# Areca SATA II RAID
+#XXX it is not 64-bit clean, -scottl
+#device		asr		# DPT SmartRAID V, VI and Adaptec SCSI RAID
+device		ciss		# Compaq Smart RAID 5*
+device		dpt		# DPT Smartcache III, IV - See NOTES for options
+device		hptmv		# Highpoint RocketRAID 182x
+device		hptrr		# Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx
+device		hpt27xx		# Highpoint RocketRAID 27xx
+device		iir		# Intel Integrated RAID
+device		ips		# IBM (Adaptec) ServeRAID
+device		mly		# Mylex AcceleRAID/eXtremeRAID
+device		twa		# 3ware 9000 series PATA/SATA RAID
+device		tws		# LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
+
+# RAID controllers
+device		aac		# Adaptec FSA RAID
+device		aacp		# SCSI passthrough for aac (requires CAM)
+device		ida		# Compaq Smart RAID
+device		mfi		# LSI MegaRAID SAS
+device		mlx		# Mylex DAC960 family
+#XXX pointer/int warnings
+#device		pst		# Promise Supertrak SX6000
+device		twe		# 3ware ATA RAID
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+device		atkbdc		# AT keyboard controller
+device		atkbd		# AT keyboard
+device		psm		# PS/2 mouse
+
+device		kbdmux		# keyboard multiplexer
+
+device		vga		# VGA video card driver
+options 	VESA		# Add support for VESA BIOS Extensions (VBE)
+
+device		splash		# Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+device		sc
+options 	SC_PIXEL_MODE	# add support for the raster text mode
+
+device		agp		# support several AGP chipsets
+
+# PCCARD (PCMCIA) support
+# PCMCIA and cardbus bridge support
+device		cbb		# cardbus (yenta) bridge
+device		pccard		# PC Card (16-bit) bus
+device		cardbus		# CardBus (32-bit) bus
+
+# Serial (COM) ports
+device		uart		# Generic UART driver
+
+# Parallel port
+device		ppc
+device		ppbus		# Parallel port bus (required)
+device		lpt		# Printer
+device		ppi		# Parallel port interface device
+#device		vpo		# Requires scbus and da
+
+device		puc		# Multi I/O cards and multi-channel UARTs
+
+# PCI Ethernet NICs.
+device		bxe		# Broadcom BCM57710/BCM57711/BCM57711E 10Gb Ethernet
+device		de		# DEC/Intel DC21x4x (``Tulip'')
+device		em		# Intel PRO/1000 Gigabit Ethernet Family
+device		igb		# Intel PRO/1000 PCIE Server Gigabit Family
+device		ixgbe		# Intel PRO/10GbE PCIE Ethernet Family
+device		le		# AMD Am7900 LANCE and Am79C9xx PCnet
+device		ti		# Alteon Networks Tigon I/II gigabit Ethernet
+device		txp		# 3Com 3cR990 (``Typhoon'')
+device		vx		# 3Com 3c590, 3c595 (``Vortex'')
+
+# PCI Ethernet NICs that use the common MII bus controller code.
+# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
+device		miibus		# MII bus support
+device		ae		# Attansic/Atheros L2 FastEthernet
+device		age		# Attansic/Atheros L1 Gigabit Ethernet
+device		alc		# Atheros AR8131/AR8132 Ethernet
+device		ale		# Atheros AR8121/AR8113/AR8114 Ethernet
+device		bce		# Broadcom BCM5706/BCM5708 Gigabit Ethernet
+device		bfe		# Broadcom BCM440x 10/100 Ethernet
+device		bge		# Broadcom BCM570xx Gigabit Ethernet
+device		cas		# Sun Cassini/Cassini+ and NS DP83065 Saturn
+device		dc		# DEC/Intel 21143 and various workalikes
+device		et		# Agere ET1310 10/100/Gigabit Ethernet
+device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
+device		gem		# Sun GEM/Sun ERI/Apple GMAC
+device		hme		# Sun HME (Happy Meal Ethernet)
+device		jme		# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
+device		lge		# Level 1 LXT1001 gigabit Ethernet
+device		msk		# Marvell/SysKonnect Yukon II Gigabit Ethernet
+device		nfe		# nVidia nForce MCP on-board Ethernet
+device		nge		# NatSemi DP83820 gigabit Ethernet
+#device		nve		# nVidia nForce MCP on-board Ethernet Networking
+device		pcn		# AMD Am79C97x PCI 10/100 (precedence over 'le')
+device		re		# RealTek 8139C+/8169/8169S/8110S
+device		rl		# RealTek 8129/8139
+device		sf		# Adaptec AIC-6915 (``Starfire'')
+device		sge		# Silicon Integrated Systems SiS190/191
+device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
+device		sk		# SysKonnect SK-984x & SK-982x gigabit Ethernet
+device		ste		# Sundance ST201 (D-Link DFE-550TX)
+device		stge		# Sundance/Tamarack TC9021 gigabit Ethernet
+device		tl		# Texas Instruments ThunderLAN
+device		tx		# SMC EtherPower II (83c170 ``EPIC'')
+device		vge		# VIA VT612x gigabit Ethernet
+device		vr		# VIA Rhine, Rhine II
+device		wb		# Winbond W89C840F
+device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
+
+# ISA Ethernet NICs.  pccard NICs included.
+device		cs		# Crystal Semiconductor CS89x0 NIC
+# 'device ed' requires 'device miibus'
+device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
+device		ex		# Intel EtherExpress Pro/10 and Pro/10+
+device		ep		# Etherlink III based cards
+device		fe		# Fujitsu MB8696x based cards
+device		sn		# SMC's 9000 series of Ethernet chips
+device		xe		# Xircom pccard Ethernet
+
+# Wireless NIC cards
+device		wlan		# 802.11 support
+options 	IEEE80211_DEBUG	# enable debug msgs
+options 	IEEE80211_AMPDU_AGE # age frames in AMPDU reorder q's
+options 	IEEE80211_SUPPORT_MESH	# enable 802.11s draft support
+device		wlan_wep	# 802.11 WEP support
+device		wlan_ccmp	# 802.11 CCMP support
+device		wlan_tkip	# 802.11 TKIP support
+device		wlan_amrr	# AMRR transmit rate control algorithm
+device		an		# Aironet 4500/4800 802.11 wireless NICs.
+device		ath		# Atheros NICs
+device		ath_pci		# Atheros pci/cardbus glue
+device		ath_hal		# pci/cardbus chip support
+options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
+options 	AH_AR5416_INTERRUPT_MITIGATION	# AR5416 interrupt mitigation
+options 	ATH_ENABLE_11N	# Enable 802.11n support for AR5416 and later
+device		ath_rate_sample	# SampleRate tx rate control for ath
+#device		bwi		# Broadcom BCM430x/BCM431x wireless NICs.
+#device		bwn		# Broadcom BCM43xx wireless NICs.
+device		ipw		# Intel 2100 wireless NICs.
+device		iwi		# Intel 2200BG/2225BG/2915ABG wireless NICs.
+device		iwn		# Intel 4965/1000/5000/6000 wireless NICs.
+device		malo		# Marvell Libertas wireless NICs.
+device		mwl		# Marvell 88W8363 802.11n wireless NICs.
+device		ral		# Ralink Technology RT2500 wireless NICs.
+device		wi		# WaveLAN/Intersil/Symbol 802.11 wireless NICs.
+device		wpi		# Intel 3945ABG wireless NICs.
+
+# Pseudo devices.
+device		loop		# Network loopback
+device		random		# Entropy device
+options 	PADLOCK_RNG	# VIA Padlock RNG
+options 	RDRAND_RNG	# Intel Bull Mountain RNG
+device		ether		# Ethernet support
+device		vlan		# 802.1Q VLAN support
+device		tun		# Packet tunnel.
+device		md		# Memory "disks"
+device		gif		# IPv6 and IPv4 tunneling
+device		faith		# IPv6-to-IPv4 relaying (translation)
+device		firmware	# firmware assist module
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+# Note that 'bpf' is required for DHCP.
+device		bpf		# Berkeley packet filter
+
+# USB support
+options 	USB_DEBUG	# enable debug msgs
+device		uhci		# UHCI PCI->USB interface
+device		ohci		# OHCI PCI->USB interface
+device		ehci		# EHCI PCI->USB interface (USB 2.0)
+device		xhci		# XHCI PCI->USB interface (USB 3.0)
+device		usb		# USB Bus (required)
+device		ukbd		# Keyboard
+device		umass		# Disks/Mass storage - Requires scbus and da
+
+# Sound support
+device		sound		# Generic sound driver (required)
+device		snd_cmi		# CMedia CMI8338/CMI8738
+device		snd_csa		# Crystal Semiconductor CS461x/428x
+device		snd_emu10kx	# Creative SoundBlaster Live! and Audigy
+device		snd_es137x	# Ensoniq AudioPCI ES137x
+device		snd_hda		# Intel High Definition Audio
+device		snd_ich		# Intel, NVidia and other ICH AC'97 Audio
+device		snd_via8233	# VIA VT8233x Audio
+
+# MMC/SD
+device		mmc		# MMC/SD bus
+device		mmcsd		# MMC/SD memory card
+device		sdhci		# Generic PCI SD Host Controller
+
+# VirtIO support
+device		virtio		# Generic VirtIO bus (required)
+device		virtio_pci	# VirtIO PCI device
+device		vtnet		# VirtIO Ethernet device
+device		virtio_blk	# VirtIO Block device
+device		virtio_scsi	# VirtIO SCSI device
+device		virtio_balloon	# VirtIO Memory Balloon device
diff --git a/sys/amd64/conf/GENERIC.hints b/sys/amd64/conf/GENERIC.hints
new file mode 100644
index 0000000..eacbbe8
--- /dev/null
+++ b/sys/amd64/conf/GENERIC.hints
@@ -0,0 +1,33 @@
+# $FreeBSD$
+hint.fdc.0.at="isa"
+hint.fdc.0.port="0x3F0"
+hint.fdc.0.irq="6"
+hint.fdc.0.drq="2"
+hint.fd.0.at="fdc0"
+hint.fd.0.drive="0"
+hint.fd.1.at="fdc0"
+hint.fd.1.drive="1"
+hint.atkbdc.0.at="isa"
+hint.atkbdc.0.port="0x060"
+hint.atkbd.0.at="atkbdc"
+hint.atkbd.0.irq="1"
+hint.psm.0.at="atkbdc"
+hint.psm.0.irq="12"
+hint.sc.0.at="isa"
+hint.sc.0.flags="0x100"
+hint.uart.0.at="isa"
+hint.uart.0.port="0x3F8"
+hint.uart.0.flags="0x10"
+hint.uart.0.irq="4"
+hint.uart.1.at="isa"
+hint.uart.1.port="0x2F8"
+hint.uart.1.irq="3"
+hint.ppc.0.at="isa"
+hint.ppc.0.irq="7"
+hint.atrtc.0.at="isa"
+hint.atrtc.0.port="0x70"
+hint.atrtc.0.irq="8"
+hint.attimer.0.at="isa"
+hint.attimer.0.port="0x40"
+hint.attimer.0.irq="0"
+hint.wbwd.0.at="isa"
diff --git a/sys/amd64/conf/Makefile b/sys/amd64/conf/Makefile
new file mode 100644
index 0000000..1d2513f
--- /dev/null
+++ b/sys/amd64/conf/Makefile
@@ -0,0 +1,5 @@
+# $FreeBSD$
+
+TARGET=amd64
+
+.include "${.CURDIR}/../../conf/makeLINT.mk"
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
new file mode 100644
index 0000000..7a41464
--- /dev/null
+++ b/sys/amd64/conf/NOTES
@@ -0,0 +1,626 @@
+#
+# NOTES -- Lines that can be cut/pasted into kernel and hints configs.
+#
+# This file contains machine dependent kernel configuration notes.  For
+# machine independent notes, look in /sys/conf/NOTES.
+#
+# $FreeBSD$
+#
+
+#
+# We want LINT to cover profiling as well.
+profile         2
+
+#
+# Enable the kernel DTrace hooks which are required to load the DTrace
+# kernel modules.
+#
+options 	KDTRACE_HOOKS
+
+
+#####################################################################
+# SMP OPTIONS:
+#
+# Notes:
+#
+# IPI_PREEMPTION instructs the kernel to preempt threads running on other
+#	  CPUS if needed.  Relies on the PREEMPTION option
+
+# Optional:
+options 	IPI_PREEMPTION
+device		atpic			# Optional legacy pic support
+device		mptable			# Optional MPSPEC mptable support
+
+#
+# Watchdog routines.
+#
+options 	MP_WATCHDOG
+
+# Debugging options.
+#
+options 	COUNT_XINVLTLB_HITS	# Counters for TLB events
+options 	COUNT_IPIS		# Per-CPU IPI interrupt counters
+
+
+
+#####################################################################
+# CPU OPTIONS
+
+#
+# You must specify at least one CPU (the one you intend to run on);
+# deleting the specification for CPUs you don't need to use may make
+# parts of the system run faster.
+#
+cpu		HAMMER			# aka K8, aka Opteron & Athlon64
+
+#
+# Options for CPU features.
+#
+
+#
+# PERFMON causes the driver for Pentium/Pentium Pro performance counters
+# to be compiled.  See perfmon(4) for more information.
+#
+#XXX#options 	PERFMON
+
+
+#####################################################################
+# NETWORKING OPTIONS
+
+#
+# DEVICE_POLLING adds support for mixed interrupt-polling handling
+# of network device drivers, which has significant benefits in terms
+# of robustness to overloads and responsivity, as well as permitting
+# accurate scheduling of the CPU time between kernel network processing
+# and other activities.  The drawback is a moderate (up to 1/HZ seconds)
+# potential increase in response times.
+# It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING
+# to achieve smoother behaviour.
+# Additionally, you can enable/disable polling at runtime with help of
+# the ifconfig(8) utility, and select the CPU fraction reserved to
+# userland with the sysctl variable kern.polling.user_frac
+# (default 50, range 0..100).
+#
+# Not all device drivers support this mode of operation at the time of
+# this writing.  See polling(4) for more details.
+
+options 	DEVICE_POLLING
+
+# BPF_JITTER adds support for BPF just-in-time compiler.
+
+options 	BPF_JITTER
+
+# OpenFabrics Enterprise Distribution (Infiniband).
+options 	OFED
+options 	OFED_DEBUG_INIT
+
+# Sockets Direct Protocol
+options 	SDP
+options 	SDP_DEBUG
+
+# IP over Infiniband
+options 	IPOIB
+options 	IPOIB_DEBUG
+options 	IPOIB_CM
+
+
+#####################################################################
+# CLOCK OPTIONS
+
+# Provide read/write access to the memory in the clock chip.
+device		nvram		# Access to rtc cmos via /dev/nvram
+
+
+#####################################################################
+# MISCELLANEOUS DEVICES AND OPTIONS
+
+device		speaker		#Play IBM BASIC-style noises out your speaker
+hint.speaker.0.at="isa"
+hint.speaker.0.port="0x61"
+device		gzip		#Exec gzipped a.out's.  REQUIRES COMPAT_AOUT!
+
+
+#####################################################################
+# HARDWARE BUS CONFIGURATION
+
+#
+# ISA bus
+#
+device		isa
+
+#
+# Options for `isa':
+#
+# AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
+# interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
+# This option breaks suspend/resume on some portables.
+#
+# AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
+# interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
+# Automatic EOI is documented not to work for for the slave with the
+# original i8259A, but it works for some clones and some integrated
+# versions.
+#
+# MAXMEM specifies the amount of RAM on the machine; if this is not
+# specified, FreeBSD will first read the amount of memory from the CMOS
+# RAM, so the amount of memory will initially be limited to 64MB or 16MB
+# depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
+# then attempt to detect the installed amount of RAM.  If this probe
+# fails to detect >64MB RAM you will have to use the MAXMEM option.
+# The amount is in kilobytes, so for a machine with 128MB of RAM, it would
+# be 131072 (128 * 1024).
+#
+# BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
+# reset the CPU for reboot.  This is needed on some systems with broken
+# keyboard controllers.
+
+options 	AUTO_EOI_1
+#options 	AUTO_EOI_2
+
+options 	MAXMEM=(128*1024)
+#options 	BROKEN_KEYBOARD_RESET
+
+#
+# PCI bus & PCI options:
+#
+device		pci
+
+#
+# AGP GART support
+device		agp
+
+#
+# AGP debugging.
+#
+options 	AGP_DEBUG
+
+
+#####################################################################
+# HARDWARE DEVICE CONFIGURATION
+
+# To include support for VGA VESA video modes
+options 	VESA
+
+# Turn on extra debugging checks and output for VESA support.
+options 	VESA_DEBUG
+
+device		dpms		# DPMS suspend & resume via VESA BIOS
+
+# x86 real mode BIOS emulator, required by atkbdc/dpms/vesa
+options		X86BIOS
+
+#
+# Optional devices:
+#
+
+# PS/2 mouse
+device		psm
+hint.psm.0.at="atkbdc"
+hint.psm.0.irq="12"
+
+# Options for psm:
+options 	PSM_HOOKRESUME		#hook the system resume event, useful
+					#for some laptops
+options 	PSM_RESETAFTERSUSPEND	#reset the device at the resume event
+
+# The keyboard controller; it controls the keyboard and the PS/2 mouse.
+device		atkbdc
+hint.atkbdc.0.at="isa"
+hint.atkbdc.0.port="0x060"
+
+# The AT keyboard
+device		atkbd
+hint.atkbd.0.at="atkbdc"
+hint.atkbd.0.irq="1"
+
+# Options for atkbd:
+options 	ATKBD_DFLT_KEYMAP	# specify the built-in keymap
+makeoptions	ATKBD_DFLT_KEYMAP=jp.106
+
+# `flags' for atkbd:
+#       0x01    Force detection of keyboard, else we always assume a keyboard
+#       0x02    Don't reset keyboard, useful for some newer ThinkPads
+#	0x03	Force detection and avoid reset, might help with certain
+#		dockingstations
+#       0x04    Old-style (XT) keyboard support, useful for older ThinkPads
+
+# Video card driver for VGA adapters.
+device		vga
+hint.vga.0.at="isa"
+
+# Options for vga:
+# Try the following option if the mouse pointer is not drawn correctly
+# or font does not seem to be loaded properly.  May cause flicker on
+# some systems.
+options 	VGA_ALT_SEQACCESS
+
+# If you can dispense with some vga driver features, you may want to
+# use the following options to save some memory.
+#options 	VGA_NO_FONT_LOADING	# don't save/load font
+#options 	VGA_NO_MODE_CHANGE	# don't change video modes
+
+# Older video cards may require this option for proper operation.
+options 	VGA_SLOW_IOACCESS	# do byte-wide i/o's to TS and GDC regs
+
+# The following option probably won't work with the LCD displays.
+options 	VGA_WIDTH90		# support 90 column modes
+
+# Debugging.
+options 	VGA_DEBUG
+
+# Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA.
+device		s3pci
+
+# 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support.  This will create
+# the /dev/3dfx0 device to work with glide implementations.  This should get
+# linked to /dev/3dfx and /dev/voodoo.  Note that this is not the same as
+# the tdfx DRI module from XFree86 and is completely unrelated.
+#
+# To enable Linuxulator support, one must also include COMPAT_LINUX in the
+# config as well.  The other option is to load both as modules.
+
+device		tdfx			# Enable 3Dfx Voodoo support
+#XXX#device 	tdfx_linux		# Enable Linuxulator support
+
+#
+# ACPI support using the Intel ACPI Component Architecture reference
+# implementation.
+#
+# ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
+# kernel environment variables to select initial debugging levels for the
+# Intel ACPICA code.  (Note that the Intel code must also have USE_DEBUGGER
+# defined when it is built).
+
+device		acpi
+options 	ACPI_DEBUG
+
+# The cpufreq(4) driver provides support for non-ACPI CPU frequency control
+device		cpufreq
+
+# Direct Rendering modules for 3D acceleration.
+device		drm		# DRM core module required by DRM drivers
+device		i915drm		# Intel i830 through i915
+device		mach64drm	# ATI Rage Pro, Rage Mobility P/M, Rage XL
+device		mgadrm		# AGP Matrox G200, G400, G450, G550
+device		r128drm		# ATI Rage 128
+device		radeondrm	# ATI Radeon
+device		savagedrm	# S3 Savage3D, Savage4
+device		sisdrm		# SiS 300/305, 540, 630
+device		tdfxdrm		# 3dfx Voodoo 3/4/5 and Banshee
+device		viadrm		# VIA
+options 	DRM_DEBUG	# Include debug printfs (slow)
+
+#
+# Network interfaces:
+#
+
+# ed:   Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
+#       HP PC Lan+, various PC Card devices
+#       (requires miibus)
+# ipw:	Intel PRO/Wireless 2100 IEEE 802.11 adapter
+#	Requires the ipw firmware module
+# iwi:	Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters
+#	Requires the iwi firmware module
+# iwn:	Intel Wireless WiFi Link 4965/1000/5000/6000 802.11 network adapters
+#	Requires the iwn firmware module
+# mlx4ib: Mellanox ConnectX HCA InfiniBand
+# mlxen: Mellanox ConnectX HCA Ethernet
+# mthca: Mellanox HCA InfiniBand
+# nfe:	nVidia nForce MCP on-board Ethernet Networking (BSD open source)
+# nve:	nVidia nForce MCP on-board Ethernet Networking
+# sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters
+# wpi:	Intel 3945ABG Wireless LAN controller
+#	Requires the wpi firmware module
+
+device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
+options 	ED_3C503
+options 	ED_HPP
+options 	ED_SIC
+device		ipw		# Intel 2100 wireless NICs.
+device		iwi		# Intel 2200BG/2225BG/2915ABG wireless NICs.
+device		iwn		# Intel 4965/1000/5000/6000 wireless NICs.
+device  	mlx4ib		# Mellanox ConnectX HCA InfiniBand
+device  	mlxen		# Mellanox ConnectX HCA Ethernet
+device  	mthca		# Mellanox HCA InfiniBand
+device		nfe		# nVidia nForce MCP on-board Ethernet
+device		nve		# nVidia nForce MCP on-board Ethernet Networking
+device		sfxge		# Solarflare SFC9000 10Gb Ethernet
+device		wpi		# Intel 3945ABG wireless NICs.
+
+# IEEE 802.11 adapter firmware modules
+
+# Intel PRO/Wireless 2100 firmware:
+#   ipwfw:		BSS/IBSS/monitor mode firmware
+#   ipwbssfw:		BSS mode firmware
+#   ipwibssfw:		IBSS mode firmware
+#   ipwmonitorfw:	Monitor mode firmware
+# Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware:
+#   iwifw:		BSS/IBSS/monitor mode firmware
+#   iwibssfw:		BSS mode firmware
+#   iwiibssfw:		IBSS mode firmware
+#   iwimonitorfw:	Monitor mode firmware
+# Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware:
+#   iwnfw:		Single module to support the 4965/1000/5000/5150/6000
+#   iwn4965fw:		Specific module for the 4965 only
+#   iwn1000fw:		Specific module for the 1000 only
+#   iwn5000fw:		Specific module for the 5000 only
+#   iwn5150fw:		Specific module for the 5150 only
+#   iwn6000fw:		Specific module for the 6000 only
+#   iwn6050fw:		Specific module for the 6050 only
+# wpifw:	Intel 3945ABG Wireless LAN Controller firmware
+
+device		iwifw
+device		iwibssfw
+device		iwiibssfw
+device		iwimonitorfw
+device		ipwfw
+device		ipwbssfw
+device		ipwibssfw
+device		ipwmonitorfw
+device		iwnfw
+device		iwn4965fw
+device		iwn1000fw
+device		iwn5000fw
+device		iwn5150fw
+device		iwn6000fw
+device		iwn6050fw
+device		wpifw
+
+#
+#XXX this stores pointers in a 32bit field that is defined by the hardware
+#device	pst
+
+#
+# Areca 11xx and 12xx series of SATA II RAID controllers.
+# CAM is required.
+#
+device		arcmsr		# Areca SATA II RAID
+
+#
+# 3ware 9000 series PATA/SATA RAID controller driver and options.
+# The driver is implemented as a SIM, and so, needs the CAM infrastructure.
+#
+options 	TWA_DEBUG		# 0-10; 10 prints the most messages.
+options 	TWA_FLASH_FIRMWARE	# firmware image bundled when defined.
+device		twa			# 3ware 9000 series PATA/SATA RAID
+
+#
+# SCSI host adapters:
+#
+# ncv: NCR 53C500 based SCSI host adapters.
+# nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
+# stg: TMC 18C30, 18C50 based SCSI host adapters.
+
+device		ncv
+device		nsp
+device		stg
+
+#
+# Adaptec FSA RAID controllers, including integrated DELL controllers,
+# the Dell PERC 2/QC and the HP NetRAID-4M
+device		aac
+device		aacp	# SCSI Passthrough interface (optional, CAM required)
+
+#
+# Highpoint RocketRAID 27xx.
+device		hpt27xx
+
+#
+# Highpoint RocketRAID 182x.
+device		hptmv
+
+#
+# Highpoint RocketRAID.  Supports RR172x, RR222x, RR2240, RR232x, RR2340,
+# RR2210, RR174x, RR2522, RR231x, RR230x.
+device		hptrr
+
+#
+# Highpoint RocketRaid 3xxx series SATA RAID
+device		hptiop
+
+#
+# IBM (now Adaptec) ServeRAID controllers
+device		ips
+
+#
+# Intel C600 (Patsburg) integrated SAS controller
+device		isci
+options		ISCI_LOGGING	# enable debugging in isci HAL
+
+#
+# NVM Express (NVMe) support
+device         nvme    # base NVMe driver
+device         nvd     # expose NVMe namespaces as disks, depends on nvme
+
+#
+# SafeNet crypto driver: can be moved to the MI NOTES as soon as
+# it's tested on a big-endian machine
+#
+device		safe		# SafeNet 1141
+options 	SAFE_DEBUG	# enable debugging support: hw.safe.debug
+options 	SAFE_RNDTEST	# enable rndtest support
+
+#
+# VirtIO support
+#
+# The virtio entry provides a generic bus for use by the device drivers.
+# It must be combined with an interface that communicates with the host.
+# Multiple such interfaces defined by the VirtIO specification. FreeBSD
+# only has support for PCI. Therefore, virtio_pci must be statically
+# compiled in or loaded as a module for the device drivers to function.
+#
+device		virtio		# Generic VirtIO bus (required)
+device		virtio_pci	# VirtIO PCI Interface
+device		vtnet		# VirtIO Ethernet device
+device		virtio_blk	# VirtIO Block device
+device		virtio_scsi	# VirtIO SCSI device
+device		virtio_balloon	# VirtIO Memory Balloon device
+
+#####################################################################
+
+#
+# Miscellaneous hardware:
+#
+# ipmi: Intelligent Platform Management Interface
+# pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724)
+# smbios: DMI/SMBIOS entry point
+# vpd: Vital Product Data kernel interface
+# asmc: Apple System Management Controller
+# si: Specialix International SI/XIO or SX intelligent serial card
+# tpm: Trusted Platform Module
+
+# Notes on the Specialix SI/XIO driver:
+#  The host card is memory, not IO mapped.
+#  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
+#  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
+#  The cards can use an IRQ of 11, 12 or 15.
+
+device		ipmi
+device		pbio
+hint.pbio.0.at="isa"
+hint.pbio.0.port="0x360"
+device		smbios
+device		vpd
+device		asmc
+#device		si
+device		tpm
+
+#
+# Laptop/Notebook options:
+#
+
+
+#
+# I2C Bus
+#
+
+#
+# Hardware watchdog timers:
+#
+# ichwd: Intel ICH watchdog timer
+# amdsbwd: AMD SB7xx watchdog timer
+# viawd: VIA south bridge watchdog timer
+# wbwd: Winbond watchdog timer
+#
+device		ichwd
+device		amdsbwd
+device		viawd
+device		wbwd
+
+#
+# Temperature sensors:
+#
+# coretemp: on-die sensor on Intel Core and newer CPUs
+# amdtemp: on-die sensor on AMD K8/K10/K11 CPUs
+#
+device		coretemp
+device		amdtemp
+
+#
+# CPU control pseudo-device. Provides access to MSRs, CPUID info and
+# microcode update feature.
+#
+device		cpuctl
+
+#
+# System Management Bus (SMB)
+#
+options 	ENABLE_ALART		# Control alarm on Intel intpm driver
+
+#
+# Number of initial kernel page table pages used for early bootstrap.
+# This number should include enough pages to map the kernel and any
+# modules or other data loaded with the kernel by the loader.  Each
+# page table page maps 2MB.
+#
+options 	NKPT=31
+
+
+#####################################################################
+# ABI Emulation
+
+#XXX keep these here for now and reactivate when support for emulating
+#XXX these 32 bit binaries is added.
+
+# Enable 32-bit runtime support for FreeBSD/i386 binaries.
+options 	COMPAT_FREEBSD32
+
+# Enable iBCS2 runtime support for SCO and ISC binaries
+#XXX#options 	IBCS2
+
+# Emulate spx device for client side of SVR3 local X interface
+#XXX#options 	SPX_HACK
+
+# Enable Linux ABI emulation
+#XXX#options 	COMPAT_LINUX
+
+# Enable 32-bit Linux ABI emulation (requires COMPAT_43 and COMPAT_FREEBSD32)
+options 	COMPAT_LINUX32
+
+# Enable the linux-like proc filesystem support (requires COMPAT_LINUX32
+# and PSEUDOFS)
+options 	LINPROCFS
+
+#Enable the linux-like sys filesystem support (requires COMPAT_LINUX32
+# and PSEUDOFS)
+options 	LINSYSFS
+
+#
+# SysVR4 ABI emulation
+#
+# The svr4 ABI emulator can be statically compiled into the kernel or loaded as
+# a KLD module.
+# The STREAMS network emulation code can also be compiled statically or as a
+# module.  If loaded as a module, it must be loaded before the svr4 module
+# (the /usr/sbin/svr4 script does this for you).  If compiling statically,
+# the `streams' device must be configured into any kernel which also
+# specifies COMPAT_SVR4.  It is possible to have a statically-configured
+# STREAMS device and a dynamically loadable svr4 emulator;  the /usr/sbin/svr4
+# script understands that it doesn't need to load the `streams' module under
+# those circumstances.
+# Caveat:  At this time, `options KTRACE' is required for the svr4 emulator
+# (whether static or dynamic).
+#
+#XXX#options 	COMPAT_SVR4	# build emulator statically
+#XXX#options 	DEBUG_SVR4	# enable verbose debugging
+#XXX#device	streams		# STREAMS network driver (required for svr4).
+
+
+#####################################################################
+# VM OPTIONS
+
+# KSTACK_PAGES is the number of memory pages to assign to the kernel
+# stack of each thread.
+
+options 	KSTACK_PAGES=5
+
+# Enable detailed accounting by the PV entry allocator.
+
+options 	PV_STATS
+
+#####################################################################
+
+# More undocumented options for linting.
+# Note that documenting these are not considered an affront.
+
+options 	FB_INSTALL_CDEV		# install a CDEV entry in /dev
+
+options 	KBDIO_DEBUG=2
+options 	KBD_MAXRETRY=4
+options 	KBD_MAXWAIT=6
+options 	KBD_RESETDELAY=201
+
+options 	PSM_DEBUG=1
+
+options 	TIMER_FREQ=((14318182+6)/12)
+
+options 	VM_KMEM_SIZE
+options 	VM_KMEM_SIZE_MAX
+options 	VM_KMEM_SIZE_SCALE
+
+# Enable NDIS binary driver support
+options 	NDISAPI
+device		ndis
+
+# Linux-specific pseudo devices support
+device		lindev
diff --git a/sys/amd64/conf/XENHVM b/sys/amd64/conf/XENHVM
new file mode 100644
index 0000000..ee745ec
--- /dev/null
+++ b/sys/amd64/conf/XENHVM
@@ -0,0 +1,22 @@
+#
+# XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64
+#
+# $FreeBSD$
+#
+include		GENERIC
+ident		XENHVM
+
+#
+# Adaptive locks rely on a lock-free pointer read to determine the run state
+# of the thread holding a lock when under contention; under a virtualisation
+# system, the thread run state may not accurately reflect whether the thread
+# (or rather its host VCPU) is actually executing.  As such, disable this
+# optimisation.
+#
+options 	NO_ADAPTIVE_MUTEXES
+options 	NO_ADAPTIVE_RWLOCKS
+options 	NO_ADAPTIVE_SX
+
+# Xen HVM support
+options 	XENHVM
+device		xenpci
diff --git a/sys/amd64/ia32/ia32_exception.S b/sys/amd64/ia32/ia32_exception.S
new file mode 100644
index 0000000..fe1a676
--- /dev/null
+++ b/sys/amd64/ia32/ia32_exception.S
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+
+#include "assym.s"
+
+	.text
+/*
+ * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * This is a SDT_SYSIDT entry point (unlike the i386 port) so that we
+ * can do a swapgs before enabling interrupts.  This is critical because
+ * if we took an interrupt before swapgs, the interrupt code would see
+ * that it originated in supervisor mode and skip the swapgs.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(int0x80_syscall)
+	swapgs
+	pushq	$2			/* sizeof "int 0x80" */
+	subq	$TF_ERR,%rsp		/* skip over tf_trapno */
+	movq	%rdi,TF_RDI(%rsp)
+	movq	PCPU(CURPCB),%rdi
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
+	movw	%es,TF_ES(%rsp)
+	movw	%ds,TF_DS(%rsp)
+	sti
+	movq	%rsi,TF_RSI(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movq	%rcx,TF_RCX(%rsp)
+	movq	%r8,TF_R8(%rsp)
+	movq	%r9,TF_R9(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rbx,TF_RBX(%rsp)
+	movq	%rbp,TF_RBP(%rsp)
+	movq	%r10,TF_R10(%rsp)
+	movq	%r11,TF_R11(%rsp)
+	movq	%r12,TF_R12(%rsp)
+	movq	%r13,TF_R13(%rsp)
+	movq	%r14,TF_R14(%rsp)
+	movq	%r15,TF_R15(%rsp)
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
+	cld
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp, %rdi
+	call	ia32_syscall
+	MEXITCOUNT
+	jmp	doreti
diff --git a/sys/amd64/ia32/ia32_misc.c b/sys/amd64/ia32/ia32_misc.c
new file mode 100644
index 0000000..5a8a721
--- /dev/null
+++ b/sys/amd64/ia32/ia32_misc.c
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <machine/cpu.h>
+#include <machine/sysarch.h>
+
+#include <compat/freebsd32/freebsd32_util.h>
+#include <compat/freebsd32/freebsd32.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+
+int
+freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
+{
+	struct sysarch_args uap1;
+	struct i386_ldt_args uapl;
+	struct i386_ldt_args32 uapl32;
+	int error;
+
+	if (uap->op == I386_SET_LDT || uap->op == I386_GET_LDT) {
+		if ((error = copyin(uap->parms, &uapl32, sizeof(uapl32))) != 0)
+			return (error);
+		uap1.op = uap->op;
+		uap1.parms = (char *)&uapl;
+		uapl.start = uapl32.start;
+		uapl.descs = (struct user_segment_descriptor *)(uintptr_t)
+		    uapl32.descs;
+		uapl.num = uapl32.num;
+		return (sysarch_ldt(td, &uap1, UIO_SYSSPACE));
+	} else {
+		uap1.op = uap->op;
+		uap1.parms = uap->parms;
+		return (sysarch(td, &uap1));
+	}
+}
+
+#ifdef COMPAT_43
+int
+ofreebsd32_getpagesize(struct thread *td,
+    struct ofreebsd32_getpagesize_args *uap)
+{
+
+	td->td_retval[0] = IA32_PAGE_SIZE;
+	return (0);
+}
+#endif
diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c
new file mode 100644
index 0000000..5bc18f1
--- /dev/null
+++ b/sys/amd64/ia32/ia32_reg.c
@@ -0,0 +1,235 @@
+/*-
+ * Copyright (c) 2005 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/exec.h>
+#include <sys/fcntl.h>
+#include <sys/imgact.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/mman.h>
+#include <sys/namei.h>
+#include <sys/pioctl.h>
+#include <sys/proc.h>
+#include <sys/procfs.h>
+#include <sys/resourcevar.h>
+#include <sys/systm.h>
+#include <sys/signalvar.h>
+#include <sys/stat.h>
+#include <sys/sx.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+
+#include <compat/freebsd32/freebsd32_util.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+#include <machine/fpu.h>
+#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/cpufunc.h>
+
+#define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
+#define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+
+int
+fill_regs32(struct thread *td, struct reg32 *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+	pcb = td->td_pcb;
+	if (tp->tf_flags & TF_HASSEGS) {
+		regs->r_gs = tp->tf_gs;
+		regs->r_fs = tp->tf_fs;
+		regs->r_es = tp->tf_es;
+		regs->r_ds = tp->tf_ds;
+	} else {
+		regs->r_gs = _ugssel;
+		regs->r_fs = _ufssel;
+		regs->r_es = _udatasel;
+		regs->r_ds = _udatasel;
+	}
+	regs->r_edi = tp->tf_rdi;
+	regs->r_esi = tp->tf_rsi;
+	regs->r_ebp = tp->tf_rbp;
+	regs->r_ebx = tp->tf_rbx;
+	regs->r_edx = tp->tf_rdx;
+	regs->r_ecx = tp->tf_rcx;
+	regs->r_eax = tp->tf_rax;
+	regs->r_eip = tp->tf_rip;
+	regs->r_cs = tp->tf_cs;
+	regs->r_eflags = tp->tf_rflags;
+	regs->r_esp = tp->tf_rsp;
+	regs->r_ss = tp->tf_ss;
+	return (0);
+}
+
+int
+set_regs32(struct thread *td, struct reg32 *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = td->td_frame;
+	if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
+		return (EINVAL);
+	pcb = td->td_pcb;
+	tp->tf_gs = regs->r_gs;
+	tp->tf_fs = regs->r_fs;
+	tp->tf_es = regs->r_es;
+	tp->tf_ds = regs->r_ds;
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+	tp->tf_flags = TF_HASSEGS;
+	tp->tf_rdi = regs->r_edi;
+	tp->tf_rsi = regs->r_esi;
+	tp->tf_rbp = regs->r_ebp;
+	tp->tf_rbx = regs->r_ebx;
+	tp->tf_rdx = regs->r_edx;
+	tp->tf_rcx = regs->r_ecx;
+	tp->tf_rax = regs->r_eax;
+	tp->tf_rip = regs->r_eip;
+	tp->tf_cs = regs->r_cs;
+	tp->tf_rflags = regs->r_eflags;
+	tp->tf_rsp = regs->r_esp;
+	tp->tf_ss = regs->r_ss;
+	return (0);
+}
+
+int
+fill_fpregs32(struct thread *td, struct fpreg32 *regs)
+{
+	struct savefpu *sv_fpu;
+	struct save87 *sv_87;
+	struct env87 *penv_87;
+	struct envxmm *penv_xmm;
+	int i;
+
+	bzero(regs, sizeof(*regs));
+	sv_87 = (struct save87 *)regs;
+	penv_87 = &sv_87->sv_env;
+	fpugetregs(td);
+	sv_fpu = get_pcb_user_save_td(td);
+	penv_xmm = &sv_fpu->sv_env;
+	
+	/* FPU control/status */
+	penv_87->en_cw = penv_xmm->en_cw;
+	penv_87->en_sw = penv_xmm->en_sw;
+	penv_87->en_tw = penv_xmm->en_tw;
+	/*
+	 * XXX for en_fip/fcs/foo/fos, check if the fxsave format
+	 * uses the old-style layout for 32 bit user apps.  If so,
+	 * read the ip and operand segment registers from there.
+	 * For now, use the process's %cs/%ds.
+	 */
+	penv_87->en_fip = penv_xmm->en_rip;
+	penv_87->en_fcs = td->td_frame->tf_cs;
+	penv_87->en_opcode = penv_xmm->en_opcode;
+	penv_87->en_foo = penv_xmm->en_rdp;
+	/* Entry into the kernel always sets TF_HASSEGS */
+	penv_87->en_fos = td->td_frame->tf_ds;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_87->sv_ac[i] = sv_fpu->sv_fp[i].fp_acc;
+
+	return (0);
+}
+
+int
+set_fpregs32(struct thread *td, struct fpreg32 *regs)
+{
+	struct save87 *sv_87 = (struct save87 *)regs;
+	struct env87 *penv_87 = &sv_87->sv_env;
+	struct savefpu *sv_fpu = get_pcb_user_save_td(td);
+	struct envxmm *penv_xmm = &sv_fpu->sv_env;
+	int i;
+
+	/* FPU control/status */
+	penv_xmm->en_cw = penv_87->en_cw;
+	penv_xmm->en_sw = penv_87->en_sw;
+	penv_xmm->en_tw = penv_87->en_tw;
+	penv_xmm->en_rip = penv_87->en_fip;
+	/* penv_87->en_fcs and en_fos ignored, see above */
+	penv_xmm->en_opcode = penv_87->en_opcode;
+	penv_xmm->en_rdp = penv_87->en_foo;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_fpu->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+	for (i = 8; i < 16; ++i)
+		bzero(&sv_fpu->sv_fp[i].fp_acc, sizeof(sv_fpu->sv_fp[i].fp_acc));
+	fpuuserinited(td);
+
+	return (0);
+}
+
+int
+fill_dbregs32(struct thread *td, struct dbreg32 *regs)
+{
+	struct dbreg dr;
+	int err, i;
+
+	err = fill_dbregs(td, &dr);
+	for (i = 0; i < 8; i++)
+		regs->dr[i] = dr.dr[i];
+	return (err);
+}
+
+int
+set_dbregs32(struct thread *td, struct dbreg32 *regs)
+{
+	struct dbreg dr;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		dr.dr[i] = regs->dr[i];
+	for (i = 8; i < 16; i++)
+		dr.dr[i] = 0;
+	return (set_dbregs(td, &dr));
+}
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
new file mode 100644
index 0000000..09ec7ab
--- /dev/null
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -0,0 +1,1006 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/exec.h>
+#include <sys/fcntl.h>
+#include <sys/imgact.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/mman.h>
+#include <sys/namei.h>
+#include <sys/pioctl.h>
+#include <sys/proc.h>
+#include <sys/procfs.h>
+#include <sys/resourcevar.h>
+#include <sys/systm.h>
+#include <sys/signalvar.h>
+#include <sys/stat.h>
+#include <sys/sx.h>
+#include <sys/syscall.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+
+#include <compat/freebsd32/freebsd32_signal.h>
+#include <compat/freebsd32/freebsd32_util.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+#include <compat/freebsd32/freebsd32.h>
+#include <compat/ia32/ia32_signal.h>
+#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/cpufunc.h>
+
+#ifdef COMPAT_FREEBSD4
+static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *);
+#endif
+
+#define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
+#define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+
+static void
+ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp,
+    char *xfpusave, size_t xfpusave_len)
+{
+	size_t max_len, len;
+
+	/*
+	 * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE
+	 * in 32bit mode saves %cs and %ds, while on 64bit it saves
+	 * 64bit instruction and data pointers. Ignore the difference
+	 * for now, it should be irrelevant for most applications.
+	 */
+	mcp->mc_ownedfp = fpugetregs(td);
+	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate,
+	    sizeof(mcp->mc_fpstate));
+	mcp->mc_fpformat = fpuformat();
+	if (!use_xsave || xfpusave_len == 0)
+		return;
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	len = xfpusave_len;
+	if (len > max_len) {
+		len = max_len;
+		bzero(xfpusave + max_len, len - max_len);
+	}
+	mcp->mc_flags |= _MC_HASFPXSTATE;
+	mcp->mc_xfpustate_len = len;
+	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
+}
+
+static int
+ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp,
+    char *xfpustate, size_t xfpustate_len)
+{
+	int error;
+
+	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+		return (0);
+	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
+		return (EINVAL);
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
+		/* We don't care what state is left in the FPU or PCB. */
+		fpstate_drop(td);
+		error = 0;
+	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+		error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
+		    xfpustate, xfpustate_len);
+	} else
+		return (EINVAL);
+	return (error);
+}
+
+/*
+ * Get machine context.
+ */
+static int
+ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	pcb = td->td_pcb;
+	tp = td->td_frame;
+
+	PROC_LOCK(curthread->td_proc);
+	mcp->mc_onstack = sigonstack(tp->tf_rsp);
+	PROC_UNLOCK(curthread->td_proc);
+	/* Entry into kernel always sets TF_HASSEGS */
+	mcp->mc_gs = tp->tf_gs;
+	mcp->mc_fs = tp->tf_fs;
+	mcp->mc_es = tp->tf_es;
+	mcp->mc_ds = tp->tf_ds;
+	mcp->mc_edi = tp->tf_rdi;
+	mcp->mc_esi = tp->tf_rsi;
+	mcp->mc_ebp = tp->tf_rbp;
+	mcp->mc_isp = tp->tf_rsp;
+	mcp->mc_eflags = tp->tf_rflags;
+	if (flags & GET_MC_CLEAR_RET) {
+		mcp->mc_eax = 0;
+		mcp->mc_edx = 0;
+		mcp->mc_eflags &= ~PSL_C;
+	} else {
+		mcp->mc_eax = tp->tf_rax;
+		mcp->mc_edx = tp->tf_rdx;
+	}
+	mcp->mc_ebx = tp->tf_rbx;
+	mcp->mc_ecx = tp->tf_rcx;
+	mcp->mc_eip = tp->tf_rip;
+	mcp->mc_cs = tp->tf_cs;
+	mcp->mc_esp = tp->tf_rsp;
+	mcp->mc_ss = tp->tf_ss;
+	mcp->mc_len = sizeof(*mcp);
+	mcp->mc_flags = tp->tf_flags;
+	ia32_get_fpcontext(td, mcp, NULL, 0);
+	mcp->mc_fsbase = pcb->pcb_fsbase;
+	mcp->mc_gsbase = pcb->pcb_gsbase;
+	mcp->mc_xfpustate = 0;
+	mcp->mc_xfpustate_len = 0;
+	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+	return (0);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifiable flags, and we won't
+ * touch the cs selector.
+ */
+static int
+ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
+{
+	struct trapframe *tp;
+	char *xfpustate;
+	long rflags;
+	int ret;
+
+	tp = td->td_frame;
+	if (mcp->mc_len != sizeof(*mcp))
+		return (EINVAL);
+	rflags = (mcp->mc_eflags & PSL_USERCHANGE) |
+	    (tp->tf_rflags & ~PSL_USERCHANGE);
+	if (mcp->mc_flags & _MC_IA32_HASFPXSTATE) {
+		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu))
+			return (EINVAL);
+		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
+		    mcp->mc_xfpustate_len);
+		if (ret != 0)
+			return (ret);
+	} else
+		xfpustate = NULL;
+	ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+	if (ret != 0)
+		return (ret);
+	tp->tf_gs = mcp->mc_gs;
+	tp->tf_fs = mcp->mc_fs;
+	tp->tf_es = mcp->mc_es;
+	tp->tf_ds = mcp->mc_ds;
+	tp->tf_flags = TF_HASSEGS;
+	tp->tf_rdi = mcp->mc_edi;
+	tp->tf_rsi = mcp->mc_esi;
+	tp->tf_rbp = mcp->mc_ebp;
+	tp->tf_rbx = mcp->mc_ebx;
+	tp->tf_rdx = mcp->mc_edx;
+	tp->tf_rcx = mcp->mc_ecx;
+	tp->tf_rax = mcp->mc_eax;
+	/* trapno, err */
+	tp->tf_rip = mcp->mc_eip;
+	tp->tf_rflags = rflags;
+	tp->tf_rsp = mcp->mc_esp;
+	tp->tf_ss = mcp->mc_ss;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	return (0);
+}
+
+/*
+ * The first two fields of a ucontext_t are the signal mask and
+ * the machine context.  The next field is uc_link; we want to
+ * avoid destroying the link when copying out contexts.
+ */
+#define	UC_COPY_SIZE	offsetof(struct ia32_ucontext, uc_link)
+
+int
+freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
+{
+	struct ia32_ucontext uc;
+	int ret;
+
+	if (uap->ucp == NULL)
+		ret = EINVAL;
+	else {
+		ia32_get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
+		PROC_LOCK(td->td_proc);
+		uc.uc_sigmask = td->td_sigmask;
+		PROC_UNLOCK(td->td_proc);
+		bzero(&uc.__spare__, sizeof(uc.__spare__));
+		ret = copyout(&uc, uap->ucp, UC_COPY_SIZE);
+	}
+	return (ret);
+}
+
+int
+freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
+{
+	struct ia32_ucontext uc;
+	int ret;	
+
+	if (uap->ucp == NULL)
+		ret = EINVAL;
+	else {
+		ret = copyin(uap->ucp, &uc, UC_COPY_SIZE);
+		if (ret == 0) {
+			ret = ia32_set_mcontext(td, &uc.uc_mcontext);
+			if (ret == 0) {
+				kern_sigprocmask(td, SIG_SETMASK,
+				    &uc.uc_sigmask, NULL, 0);
+			}
+		}
+	}
+	return (ret == 0 ? EJUSTRETURN : ret);
+}
+
+int
+freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
+{
+	struct ia32_ucontext uc;
+	int ret;	
+
+	if (uap->oucp == NULL || uap->ucp == NULL)
+		ret = EINVAL;
+	else {
+		ia32_get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
+		PROC_LOCK(td->td_proc);
+		uc.uc_sigmask = td->td_sigmask;
+		PROC_UNLOCK(td->td_proc);
+		ret = copyout(&uc, uap->oucp, UC_COPY_SIZE);
+		if (ret == 0) {
+			ret = copyin(uap->ucp, &uc, UC_COPY_SIZE);
+			if (ret == 0) {
+				ret = ia32_set_mcontext(td, &uc.uc_mcontext);
+				if (ret == 0) {
+					kern_sigprocmask(td, SIG_SETMASK,
+					    &uc.uc_sigmask, NULL, 0);
+				}
+			}
+		}
+	}
+	return (ret == 0 ? EJUSTRETURN : ret);
+}
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by kcall
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+
+#ifdef COMPAT_43
+static void
+ia32_osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct ia32_sigframe3 sf, *fp;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	int sig;
+	int oonstack;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	sig = ksi->ksi_signo;
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		fp = (struct ia32_sigframe3 *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(sf));
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	} else
+		fp = (struct ia32_sigframe3 *)regs->tf_rsp - 1;
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
+		sf.sf_siginfo.si_signo = sig;
+		sf.sf_siginfo.si_code = ksi->ksi_code;
+		sf.sf_ah = (uintptr_t)catcher;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_arg2 = ksi->ksi_code;
+		sf.sf_addr = (register_t)ksi->ksi_addr;
+		sf.sf_ah = (uintptr_t)catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+
+	/* Save most if not all of trap frame. */
+	sf.sf_siginfo.si_sc.sc_eax = regs->tf_rax;
+	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_rbx;
+	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_rcx;
+	sf.sf_siginfo.si_sc.sc_edx = regs->tf_rdx;
+	sf.sf_siginfo.si_sc.sc_esi = regs->tf_rsi;
+	sf.sf_siginfo.si_sc.sc_edi = regs->tf_rdi;
+	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
+	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
+	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
+	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
+	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
+	sf.sf_siginfo.si_sc.sc_gs = regs->tf_gs;
+	sf.sf_siginfo.si_sc.sc_isp = regs->tf_rsp;
+
+	/* Build the signal context to be used by osigreturn(). */
+	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
+	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
+	sf.sf_siginfo.si_sc.sc_esp = regs->tf_rsp;
+	sf.sf_siginfo.si_sc.sc_ebp = regs->tf_rbp;
+	sf.sf_siginfo.si_sc.sc_eip = regs->tf_rip;
+	sf.sf_siginfo.si_sc.sc_eflags = regs->tf_rflags;
+	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
+	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_rsp = (uintptr_t)fp;
+	regs->tf_rip = p->p_sysent->sv_psstrings - sz_ia32_osigcode;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucode32sel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _udatasel;
+	regs->tf_ss = _udatasel;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+#endif
+
+#ifdef COMPAT_FREEBSD4
+static void
+freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct ia32_sigframe4 sf, *sfp;
+	struct siginfo32 siginfo;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	int oonstack;
+	int sig;
+
+	td = curthread;
+	p = td->td_proc;
+	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	sig = siginfo.si_signo;
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+	/* Save user context. */
+	bzero(&sf, sizeof(sf));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
+	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
+	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+	sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
+	sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
+	sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
+	sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */
+	sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx;
+	sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx;
+	sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx;
+	sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax;
+	sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno;
+	sf.sf_uc.uc_mcontext.mc_err = regs->tf_err;
+	sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip;
+	sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs;
+	sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
+	sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
+	sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
+	sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
+	sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
+	sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
+	sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
+	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
+	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
+	bzero(sf.sf_uc.uc_mcontext.__spare__,
+	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
+	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		sfp = (struct ia32_sigframe4 *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(sf));
+	} else
+		sfp = (struct ia32_sigframe4 *)regs->tf_rsp - 1;
+	PROC_UNLOCK(p);
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_ucontext = (register_t)&sfp->sf_uc;
+	bzero(&sf.sf_si, sizeof(sf.sf_si));
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si;
+		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
+
+		/* Fill in POSIX parts */
+		sf.sf_si = siginfo;
+		sf.sf_si.si_signo = sig;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_siginfo = siginfo.si_code;
+		sf.sf_addr = (u_int32_t)siginfo.si_addr;
+		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_rsp = (uintptr_t)sfp;
+	regs->tf_rip = p->p_sysent->sv_sigcode_base + sz_ia32_sigcode -
+	    sz_freebsd4_ia32_sigcode;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucode32sel;
+	regs->tf_ss = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	/* leave user %fs and %gs untouched */
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+#endif	/* COMPAT_FREEBSD4 */
+
+void
+ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct ia32_sigframe sf, *sfp;
+	struct siginfo32 siginfo;
+	struct proc *p;
+	struct thread *td;
+	struct sigacts *psp;
+	char *sp;
+	struct trapframe *regs;
+	char *xfpusave;
+	size_t xfpusave_len;
+	int oonstack;
+	int sig;
+
+	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	sig = siginfo.si_signo;
+	psp = p->p_sigacts;
+#ifdef COMPAT_FREEBSD4
+	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
+		freebsd4_ia32_sendsig(catcher, ksi, mask);
+		return;
+	}
+#endif
+#ifdef COMPAT_43
+	if (SIGISMEMBER(psp->ps_osigset, sig)) {
+		ia32_osendsig(catcher, ksi, mask);
+		return;
+	}
+#endif
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
+		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+		xfpusave = __builtin_alloca(xfpusave_len);
+	} else {
+		xfpusave_len = 0;
+		xfpusave = NULL;
+	}
+
+	/* Save user context. */
+	bzero(&sf, sizeof(sf));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
+	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
+	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
+	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
+	sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
+	sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
+	sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
+	sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */
+	sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx;
+	sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx;
+	sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx;
+	sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax;
+	sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno;
+	sf.sf_uc.uc_mcontext.mc_err = regs->tf_err;
+	sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip;
+	sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs;
+	sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
+	sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
+	sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
+	sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
+	sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
+	sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
+	sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
+	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+	ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
+	fpstate_drop(td);
+	sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
+	sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
+	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
+
+	/* Allocate space for the signal handler context. */
+	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig))
+		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
+	else
+		sp = (char *)regs->tf_rsp;
+	if (xfpusave != NULL) {
+		sp -= xfpusave_len;
+		sp = (char *)((unsigned long)sp & ~0x3Ful);
+		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+	}
+	sp -= sizeof(sf);
+	/* Align to 16 bytes. */
+	sfp = (struct ia32_sigframe *)((uintptr_t)sp & ~0xF);
+	PROC_UNLOCK(p);
+
+	/* Translate the signal if appropriate. */
+	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
+		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_ucontext = (register_t)&sfp->sf_uc;
+	bzero(&sf.sf_si, sizeof(sf.sf_si));
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si;
+		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
+
+		/* Fill in POSIX parts */
+		sf.sf_si = siginfo;
+		sf.sf_si.si_signo = sig;
+	} else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_siginfo = siginfo.si_code;
+		sf.sf_addr = (u_int32_t)siginfo.si_addr;
+		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
+	}
+	mtx_unlock(&psp->ps_mtx);
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+	    (xfpusave != NULL && copyout(xfpusave,
+	    PTRIN(sf.sf_uc.uc_mcontext.mc_xfpustate), xfpusave_len)
+	    != 0)) {
+#ifdef DEBUG
+		printf("process %ld has trashed its stack\n", (long)p->p_pid);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	regs->tf_rsp = (uintptr_t)sfp;
+	regs->tf_rip = p->p_sysent->sv_sigcode_base;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucode32sel;
+	regs->tf_ss = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	/* XXXKIB leave user %fs and %gs untouched */
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ */
+
+#ifdef COMPAT_43
+int
+ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap)
+{
+	struct ia32_sigcontext3 sc, *scp;
+	struct trapframe *regs;
+	int eflags, error;
+	ksiginfo_t ksi;
+
+	regs = td->td_frame;
+	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
+	if (error != 0)
+		return (error);
+	scp = &sc;
+	eflags = scp->sc_eflags;
+	if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
+		return (EINVAL);
+	}
+	if (!CS_SECURE(scp->sc_cs)) {
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return (EINVAL);
+	}
+	regs->tf_ds = scp->sc_ds;
+	regs->tf_es = scp->sc_es;
+	regs->tf_fs = scp->sc_fs;
+	regs->tf_gs = scp->sc_gs;
+
+	regs->tf_rax = scp->sc_eax;
+	regs->tf_rbx = scp->sc_ebx;
+	regs->tf_rcx = scp->sc_ecx;
+	regs->tf_rdx = scp->sc_edx;
+	regs->tf_rsi = scp->sc_esi;
+	regs->tf_rdi = scp->sc_edi;
+	regs->tf_cs = scp->sc_cs;
+	regs->tf_ss = scp->sc_ss;
+	regs->tf_rbp = scp->sc_ebp;
+	regs->tf_rsp = scp->sc_esp;
+	regs->tf_rip = scp->sc_eip;
+	regs->tf_rflags = eflags;
+
+	if (scp->sc_onstack & 1)
+		td->td_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
+
+	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
+	    SIGPROCMASK_OLD);
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	return (EJUSTRETURN);
+}
+#endif
+
+#ifdef COMPAT_FREEBSD4
+/*
+ * MPSAFE
+ */
+int
+freebsd4_freebsd32_sigreturn(td, uap)
+	struct thread *td;
+	struct freebsd4_freebsd32_sigreturn_args /* {
+		const struct freebsd4_freebsd32_ucontext *sigcntxp;
+	} */ *uap;
+{
+	struct ia32_ucontext4 uc;
+	struct trapframe *regs;
+	struct ia32_ucontext4 *ucp;
+	int cs, eflags, error;
+	ksiginfo_t ksi;
+
+	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+	if (error != 0)
+		return (error);
+	ucp = &uc;
+	regs = td->td_frame;
+	eflags = ucp->uc_mcontext.mc_eflags;
+	/*
+	 * Don't allow users to change privileged or reserved flags.
+	 */
+	/*
+	 * XXX do allow users to change the privileged flag PSL_RF.
+	 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+	 * should sometimes set it there too.  tf_eflags is kept in
+	 * the signal context during signal handling and there is no
+	 * other place to remember it, so the PSL_RF bit may be
+	 * corrupted by the signal handler without us knowing.
+	 * Corruption of the PSL_RF bit at worst causes one more or
+	 * one less debugger trap, so allowing it is fairly harmless.
+	 */
+	if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
+		uprintf("pid %d (%s): freebsd4_freebsd32_sigreturn eflags = 0x%x\n",
+		    td->td_proc->p_pid, td->td_name, eflags);
+		return (EINVAL);
+	}
+
+	/*
+	 * Don't allow users to load a valid privileged %cs.  Let the
+	 * hardware check for invalid selectors, excess privilege in
+	 * other selectors, invalid %eip's and invalid %esp's.
+	 */
+	cs = ucp->uc_mcontext.mc_cs;
+	if (!CS_SECURE(cs)) {
+		uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
+		    td->td_proc->p_pid, td->td_name, cs);
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return (EINVAL);
+	}
+
+	regs->tf_rdi = ucp->uc_mcontext.mc_edi;
+	regs->tf_rsi = ucp->uc_mcontext.mc_esi;
+	regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
+	regs->tf_rbx = ucp->uc_mcontext.mc_ebx;
+	regs->tf_rdx = ucp->uc_mcontext.mc_edx;
+	regs->tf_rcx = ucp->uc_mcontext.mc_ecx;
+	regs->tf_rax = ucp->uc_mcontext.mc_eax;
+	regs->tf_trapno = ucp->uc_mcontext.mc_trapno;
+	regs->tf_err = ucp->uc_mcontext.mc_err;
+	regs->tf_rip = ucp->uc_mcontext.mc_eip;
+	regs->tf_cs = cs;
+	regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
+	regs->tf_rsp = ucp->uc_mcontext.mc_esp;
+	regs->tf_ss = ucp->uc_mcontext.mc_ss;
+	regs->tf_ds = ucp->uc_mcontext.mc_ds;
+	regs->tf_es = ucp->uc_mcontext.mc_es;
+	regs->tf_fs = ucp->uc_mcontext.mc_fs;
+	regs->tf_gs = ucp->uc_mcontext.mc_gs;
+
+	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	return (EJUSTRETURN);
+}
+#endif	/* COMPAT_FREEBSD4 */
+
+/*
+ * MPSAFE
+ */
+int
+freebsd32_sigreturn(td, uap)
+	struct thread *td;
+	struct freebsd32_sigreturn_args /* {
+		const struct freebsd32_ucontext *sigcntxp;
+	} */ *uap;
+{
+	struct ia32_ucontext uc;
+	struct trapframe *regs;
+	struct ia32_ucontext *ucp;
+	char *xfpustate;
+	size_t xfpustate_len;
+	int cs, eflags, error, ret;
+	ksiginfo_t ksi;
+
+	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
+	if (error != 0)
+		return (error);
+	ucp = &uc;
+	regs = td->td_frame;
+	eflags = ucp->uc_mcontext.mc_eflags;
+	/*
+	 * Don't allow users to change privileged or reserved flags.
+	 */
+	/*
+	 * XXX do allow users to change the privileged flag PSL_RF.
+	 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+	 * should sometimes set it there too.  tf_eflags is kept in
+	 * the signal context during signal handling and there is no
+	 * other place to remember it, so the PSL_RF bit may be
+	 * corrupted by the signal handler without us knowing.
+	 * Corruption of the PSL_RF bit at worst causes one more or
+	 * one less debugger trap, so allowing it is fairly harmless.
+	 */
+	if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
+		uprintf("pid %d (%s): freebsd32_sigreturn eflags = 0x%x\n",
+		    td->td_proc->p_pid, td->td_name, eflags);
+		return (EINVAL);
+	}
+
+	/*
+	 * Don't allow users to load a valid privileged %cs.  Let the
+	 * hardware check for invalid selectors, excess privilege in
+	 * other selectors, invalid %eip's and invalid %esp's.
+	 */
+	cs = ucp->uc_mcontext.mc_cs;
+	if (!CS_SECURE(cs)) {
+		uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
+		    td->td_proc->p_pid, td->td_name, cs);
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return (EINVAL);
+	}
+
+	if ((ucp->uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+		if (xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu)) {
+			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+			    td->td_proc->p_pid, td->td_name, xfpustate_len);
+			return (EINVAL);
+		}
+		xfpustate = __builtin_alloca(xfpustate_len);
+		error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
+		    xfpustate, xfpustate_len);
+		if (error != 0) {
+			uprintf(
+	"pid %d (%s): sigreturn copying xfpustate failed\n",
+			    td->td_proc->p_pid, td->td_name);
+			return (error);
+		}
+	} else {
+		xfpustate = NULL;
+		xfpustate_len = 0;
+	}
+	ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
+	    xfpustate_len);
+	if (ret != 0) {
+		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
+		    td->td_proc->p_pid, td->td_name, ret);
+		return (ret);
+	}
+
+	regs->tf_rdi = ucp->uc_mcontext.mc_edi;
+	regs->tf_rsi = ucp->uc_mcontext.mc_esi;
+	regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
+	regs->tf_rbx = ucp->uc_mcontext.mc_ebx;
+	regs->tf_rdx = ucp->uc_mcontext.mc_edx;
+	regs->tf_rcx = ucp->uc_mcontext.mc_ecx;
+	regs->tf_rax = ucp->uc_mcontext.mc_eax;
+	regs->tf_trapno = ucp->uc_mcontext.mc_trapno;
+	regs->tf_err = ucp->uc_mcontext.mc_err;
+	regs->tf_rip = ucp->uc_mcontext.mc_eip;
+	regs->tf_cs = cs;
+	regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
+	regs->tf_rsp = ucp->uc_mcontext.mc_esp;
+	regs->tf_ss = ucp->uc_mcontext.mc_ss;
+	regs->tf_ds = ucp->uc_mcontext.mc_ds;
+	regs->tf_es = ucp->uc_mcontext.mc_es;
+	regs->tf_fs = ucp->uc_mcontext.mc_fs;
+	regs->tf_gs = ucp->uc_mcontext.mc_gs;
+	regs->tf_flags = TF_HASSEGS;
+
+	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	return (EJUSTRETURN);
+}
+
+/*
+ * Clear registers on exec
+ */
+void
+ia32_setregs(struct thread *td, struct image_params *imgp, u_long stack)
+{
+	struct trapframe *regs = td->td_frame;
+	struct pcb *pcb = td->td_pcb;
+	
+	mtx_lock(&dt_lock);
+	if (td->td_proc->p_md.md_ldt != NULL)
+		user_ldt_free(td);
+	else
+		mtx_unlock(&dt_lock);
+#ifdef COMPAT_43
+	setup_lcall_gate();
+#endif
+
+	pcb->pcb_fsbase = 0;
+	pcb->pcb_gsbase = 0;
+	pcb->pcb_initial_fpucw = __INITIAL_FPUCW_I386__;
+
+	bzero((char *)regs, sizeof(struct trapframe));
+	regs->tf_rip = imgp->entry_addr;
+	regs->tf_rsp = stack;
+	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
+	regs->tf_ss = _udatasel;
+	regs->tf_cs = _ucode32sel;
+	regs->tf_rbx = imgp->ps_strings;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _ufssel;
+	regs->tf_gs = _ugssel;
+	regs->tf_flags = TF_HASSEGS;
+
+	fpstate_drop(td);
+
+	/* Return via doreti so that we can change to a different %cs */
+	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
+	clear_pcb_flags(pcb, PCB_GS32BIT);
+	td->td_retval[1] = 0;
+}
diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S
new file mode 100644
index 0000000..3541988
--- /dev/null
+++ b/sys/amd64/ia32/ia32_sigtramp.S
@@ -0,0 +1,161 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_compat.h"
+
+#include <machine/asmacros.h>
+#include <sys/syscall.h>
+
+#include "ia32_assym.h"
+
+	.text
+	.code32
+/*
+ * Signal trampoline, copied to top of user stack
+ * XXX may need to be MD to match backend sendsig handoff protocol
+ */
+	ALIGN_TEXT
+	.globl	ia32_sigcode
+ia32_sigcode:
+	calll	*IA32_SIGF_HANDLER(%esp)
+	leal	IA32_SIGF_UC(%esp),%eax	/* get ucontext */
+	pushl	%eax
+	movl	$SYS_sigreturn,%eax
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+
+#ifdef COMPAT_FREEBSD4
+	ALIGN_TEXT
+freebsd4_ia32_sigcode:
+	calll	*IA32_SIGF_HANDLER(%esp)
+	leal	IA32_SIGF_UC4(%esp),%eax/* get ucontext */
+	pushl	%eax
+	movl	$344,%eax		/* 4.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+#endif
+
+#ifdef COMPAT_43
+	ALIGN_TEXT
+ia32_osigcode:
+	calll	*IA32_SIGF_HANDLER(%esp)/* call signal handler */
+	leal	IA32_SIGF_SC(%esp),%eax	/* get sigcontext */
+	pushl	%eax
+	movl	$103,%eax		/* 3.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+1:
+	jmp	1b
+
+
+/*
+ * The lcall $7,$0 emulator cannot use the call gate that does an
+ * inter-privilege transition. The reason is that the call gate
+ * does not disable interrupts, and, before the swapgs is
+ * executed, we would have a window where the ring 0 code is
+ * executed with the wrong gsbase.
+ *
+ * Instead, reflect the lcall $7,$0 back to ring 3 trampoline
+ * which sets up the frame for int $0x80.
+ */
+	ALIGN_TEXT
+lcall_tramp:
+	.code64
+	/*
+	 * There, we are in 64bit mode and need to return to 32bit.
+	 * First, convert call frame from 64 to 32 bit format.
+	 */
+	pushq	%rax
+	movl	16(%rsp),%eax
+	movl	%eax,20(%rsp)	/* ret %cs */
+	movl	8(%rsp),%eax
+	movl	%eax,16(%rsp)	/* ret %rip -> %eip */
+	popq	%rax
+	addq	$8,%rsp
+	/* Now return to 32bit */
+	pushq	$0x33 /* _ucode32sel UPL */
+	callq	1f
+1:
+	addq	$2f-1b,(%rsp)
+	lretq
+2:
+	/* Back in 32bit mode */
+	.code32
+	cmpl	$SYS_vfork,%eax
+	je	4f
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	0x24(%ebp) /* arg 6 */
+	pushl	0x20(%ebp)
+	pushl	0x1c(%ebp)
+	pushl	0x18(%ebp)
+	pushl	0x14(%ebp)
+	pushl	0x10(%ebp) /* arg 1 */
+	pushl	0xc(%ebp)  /* gap */
+	int	$0x80
+	leavel
+3:
+	lretl
+4:
+	/*
+	 * vfork handling is special and relies on the libc stub saving
+	 * the return ip in %ecx.  If vfork failed, then there is no
+	 * child which can corrupt the frame created by call gate.
+	 */
+	int	$0x80
+	jb	3b
+	addl	$8,%esp
+	jmpl	*%ecx
+#endif
+
+	ALIGN_TEXT
+esigcode:
+
+	.data
+	.globl	sz_ia32_sigcode
+sz_ia32_sigcode:
+	.long	esigcode-ia32_sigcode
+#ifdef COMPAT_FREEBSD4
+	.globl	sz_freebsd4_ia32_sigcode
+sz_freebsd4_ia32_sigcode:
+	.long	esigcode-freebsd4_ia32_sigcode
+#endif
+#ifdef COMPAT_43
+	.globl	sz_ia32_osigcode
+sz_ia32_osigcode:
+	.long	esigcode-ia32_osigcode
+	.globl	sz_lcall_tramp
+sz_lcall_tramp:
+	.long	esigcode-lcall_tramp
+#endif
diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c
new file mode 100644
index 0000000..0cdec6f
--- /dev/null
+++ b/sys/amd64/ia32/ia32_syscall.c
@@ -0,0 +1,255 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * 386 Trap and System call handling
+ */
+
+#include "opt_clock.h"
+#include "opt_compat.h"
+#include "opt_cpu.h"
+#include "opt_isa.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/ptrace.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#include <security/audit/audit.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+
+#include <compat/freebsd32/freebsd32_signal.h>
+#include <compat/freebsd32/freebsd32_util.h>
+#include <compat/ia32/ia32_signal.h>
+#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/cpufunc.h>
+
+#define	IDTVEC(name)	__CONCAT(X,name)
+
+extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd);
+
+void ia32_syscall(struct trapframe *frame);	/* Called from asm code */
+
+void
+ia32_set_syscall_retval(struct thread *td, int error)
+{
+
+	cpu_set_syscall_retval(td, error);
+}
+
+int
+ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+{
+	struct proc *p;
+	struct trapframe *frame;
+	caddr_t params;
+	u_int32_t args[8];
+	int error, i;
+
+	p = td->td_proc;
+	frame = td->td_frame;
+
+	params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
+	sa->code = frame->tf_rax;
+
+	/*
+	 * Need to check if this is a 32 bit or 64 bit syscall.
+	 */
+	if (sa->code == SYS_syscall) {
+		/*
+		 * Code is first argument, followed by actual args.
+		 */
+		sa->code = fuword32(params);
+		params += sizeof(int);
+	} else if (sa->code == SYS___syscall) {
+		/*
+		 * Like syscall, but code is a quad, so as to maintain
+		 * quad alignment for the rest of the arguments.
+		 * We use a 32-bit fetch in case params is not
+		 * aligned.
+		 */
+		sa->code = fuword32(params);
+		params += sizeof(quad_t);
+	}
+ 	if (p->p_sysent->sv_mask)
+ 		sa->code &= p->p_sysent->sv_mask;
+ 	if (sa->code >= p->p_sysent->sv_size)
+ 		sa->callp = &p->p_sysent->sv_table[0];
+  	else
+ 		sa->callp = &p->p_sysent->sv_table[sa->code];
+	sa->narg = sa->callp->sy_narg;
+
+	if (params != NULL && sa->narg != 0)
+		error = copyin(params, (caddr_t)args,
+		    (u_int)(sa->narg * sizeof(int)));
+	else
+		error = 0;
+
+	for (i = 0; i < sa->narg; i++)
+		sa->args[i] = args[i];
+
+	if (error == 0) {
+		td->td_retval[0] = 0;
+		td->td_retval[1] = frame->tf_rdx;
+	}
+
+	return (error);
+}
+
+#include "../../kern/subr_syscall.c"
+
+void
+ia32_syscall(struct trapframe *frame)
+{
+	struct thread *td;
+	struct syscall_args sa;
+	register_t orig_tf_rflags;
+	int error;
+	ksiginfo_t ksi;
+
+	orig_tf_rflags = frame->tf_rflags;
+	td = curthread;
+	td->td_frame = frame;
+
+	error = syscallenter(td, &sa);
+
+	/*
+	 * Traced syscall.
+	 */
+	if (orig_tf_rflags & PSL_T) {
+		frame->tf_rflags &= ~PSL_T;
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGTRAP;
+		ksi.ksi_code = TRAP_TRACE;
+		ksi.ksi_addr = (void *)frame->tf_rip;
+		trapsignal(td, &ksi);
+	}
+
+	syscallret(td, error, &sa);
+}
+
+static void
+ia32_syscall_enable(void *dummy)
+{
+
+ 	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0);
+}
+
+static void
+ia32_syscall_disable(void *dummy)
+{
+
+ 	setidt(IDT_SYSCALL, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+SYSINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_enable, NULL);
+SYSUNINIT(ia32_syscall, SI_SUB_EXEC, SI_ORDER_ANY, ia32_syscall_disable, NULL);
+
+#ifdef COMPAT_43
+int
+setup_lcall_gate(void)
+{
+	struct i386_ldt_args uap;
+	struct user_segment_descriptor descs[2];
+	struct gate_descriptor *ssd;
+	uint32_t lcall_addr;
+	int error;
+
+	bzero(&uap, sizeof(uap));
+	uap.start = 0;
+	uap.num = 2;
+
+	/*
+	 * This is the easiest way to cut the space for system
+	 * descriptor in ldt.  Manually adjust the descriptor type to
+	 * the call gate later.
+	 */
+	bzero(&descs[0], sizeof(descs));
+	descs[0].sd_type = SDT_SYSNULL;
+	descs[1].sd_type = SDT_SYSNULL;
+	error = amd64_set_ldt(curthread, &uap, descs);
+	if (error != 0)
+		return (error);
+
+	lcall_addr = curproc->p_sysent->sv_psstrings - sz_lcall_tramp;
+	mtx_lock(&dt_lock);
+	ssd = (struct gate_descriptor *)(curproc->p_md.md_ldt->ldt_base);
+	bzero(ssd, sizeof(*ssd));
+	ssd->gd_looffset = lcall_addr;
+	ssd->gd_hioffset = lcall_addr >> 16;
+	ssd->gd_selector = _ucodesel;
+	ssd->gd_type = SDT_SYSCGT;
+	ssd->gd_dpl = SEL_UPL;
+	ssd->gd_p = 1;
+	mtx_unlock(&dt_lock);
+
+	return (0);
+}
+#endif
diff --git a/sys/amd64/include/_align.h b/sys/amd64/include/_align.h
new file mode 100644
index 0000000..28c4669
--- /dev/null
+++ b/sys/amd64/include/_align.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/_align.h>
diff --git a/sys/amd64/include/_bus.h b/sys/amd64/include/_bus.h
new file mode 100644
index 0000000..a8cbf48
--- /dev/null
+++ b/sys/amd64/include/_bus.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2005 M. Warner Losh.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef AMD64_INCLUDE__BUS_H
+#define AMD64_INCLUDE__BUS_H
+
+/*
+ * Bus address and size types
+ */
+typedef uint64_t bus_addr_t;
+typedef uint64_t bus_size_t;
+
+/*
+ * Access methods for bus resources and address space.
+ */
+typedef	uint64_t bus_space_tag_t;
+typedef	uint64_t bus_space_handle_t;
+
+#endif /* AMD64_INCLUDE__BUS_H */
diff --git a/sys/amd64/include/_inttypes.h b/sys/amd64/include/_inttypes.h
new file mode 100644
index 0000000..40107cd
--- /dev/null
+++ b/sys/amd64/include/_inttypes.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/_inttypes.h>
diff --git a/sys/amd64/include/_limits.h b/sys/amd64/include/_limits.h
new file mode 100644
index 0000000..3c7365b
--- /dev/null
+++ b/sys/amd64/include/_limits.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/_limits.h>
diff --git a/sys/amd64/include/_stdint.h b/sys/amd64/include/_stdint.h
new file mode 100644
index 0000000..db1affc
--- /dev/null
+++ b/sys/amd64/include/_stdint.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/_stdint.h>
diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h
new file mode 100644
index 0000000..2680367
--- /dev/null
+++ b/sys/amd64/include/_types.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/_types.h>
diff --git a/sys/amd64/include/acpica_machdep.h b/sys/amd64/include/acpica_machdep.h
new file mode 100644
index 0000000..9943af7
--- /dev/null
+++ b/sys/amd64/include/acpica_machdep.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2002 Mitsuru IWASAKI
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/******************************************************************************
+ *
+ * Name: acpica_machdep.h - arch-specific defines, etc.
+ *       $Revision$
+ *
+ *****************************************************************************/
+
+#ifndef __ACPICA_MACHDEP_H__
+#define	__ACPICA_MACHDEP_H__
+
+#ifdef _KERNEL
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE      - External ACPI interfaces 
+ * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
+ */
+#define	ACPI_SYSTEM_XFACE
+#define	ACPI_EXTERNAL_XFACE
+#define	ACPI_INTERNAL_XFACE
+#define	ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define	ACPI_ASM_MACROS
+#define	BREAKPOINT3
+#define	ACPI_DISABLE_IRQS() disable_intr()
+#define	ACPI_ENABLE_IRQS()  enable_intr()
+
+#define	ACPI_FLUSH_CPU_CACHE()	wbinvd()
+
+/* Section 5.2.9.1:  global lock acquire/release functions */
+extern int	acpi_acquire_global_lock(uint32_t *lock);
+extern int	acpi_release_global_lock(uint32_t *lock);
+#define	ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq)	do {			\
+	(Acq) = acpi_acquire_global_lock(&((GLptr)->GlobalLock));	\
+} while (0)
+#define	ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq)	do {			\
+	(Acq) = acpi_release_global_lock(&((GLptr)->GlobalLock));	\
+} while (0)
+ 
+#endif /* _KERNEL */
+
+#define	ACPI_MACHINE_WIDTH             64
+#define	COMPILER_DEPENDENT_INT64       long
+#define	COMPILER_DEPENDENT_UINT64      unsigned long
+
+void	acpi_SetDefaultIntrModel(int model);
+void	acpi_cpu_c1(void);
+void	*acpi_map_table(vm_paddr_t pa, const char *sig);
+void	acpi_unmap_table(void *table);
+vm_paddr_t acpi_find_table(const char *sig);
+
+#endif /* __ACPICA_MACHDEP_H__ */
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
new file mode 100644
index 0000000..ae2f5b9
--- /dev/null
+++ b/sys/amd64/include/apicvar.h
@@ -0,0 +1,232 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_APICVAR_H_
+#define _MACHINE_APICVAR_H_
+
+#include <machine/segments.h>
+
+/*
+ * Local && I/O APIC variable definitions.
+ */
+
+/*
+ * Layout of local APIC interrupt vectors:
+ *
+ *	0xff (255)  +-------------+
+ *                  |             | 15 (Spurious / IPIs / Local Interrupts)
+ *	0xf0 (240)  +-------------+
+ *                  |             | 14 (I/O Interrupts / Timer)
+ *	0xe0 (224)  +-------------+
+ *                  |             | 13 (I/O Interrupts)
+ *	0xd0 (208)  +-------------+
+ *                  |             | 12 (I/O Interrupts)
+ *	0xc0 (192)  +-------------+
+ *                  |             | 11 (I/O Interrupts)
+ *	0xb0 (176)  +-------------+
+ *                  |             | 10 (I/O Interrupts)
+ *	0xa0 (160)  +-------------+
+ *                  |             | 9 (I/O Interrupts)
+ *	0x90 (144)  +-------------+
+ *                  |             | 8 (I/O Interrupts / System Calls)
+ *	0x80 (128)  +-------------+
+ *                  |             | 7 (I/O Interrupts)
+ *	0x70 (112)  +-------------+
+ *                  |             | 6 (I/O Interrupts)
+ *	0x60 (96)   +-------------+
+ *                  |             | 5 (I/O Interrupts)
+ *	0x50 (80)   +-------------+
+ *                  |             | 4 (I/O Interrupts)
+ *	0x40 (64)   +-------------+
+ *                  |             | 3 (I/O Interrupts)
+ *	0x30 (48)   +-------------+
+ *                  |             | 2 (ATPIC Interrupts)
+ *	0x20 (32)   +-------------+
+ *                  |             | 1 (Exceptions, traps, faults, etc.)
+ *	0x10 (16)   +-------------+
+ *                  |             | 0 (Exceptions, traps, faults, etc.)
+ *	0x00 (0)    +-------------+
+ *
+ * Note: 0x80 needs to be handled specially and not allocated to an
+ * I/O device!
+ */
+
+#define	MAX_APIC_ID	0xfe
+#define	APIC_ID_ALL	0xff
+
+/* I/O Interrupts are used for external devices such as ISA, PCI, etc. */
+#define	APIC_IO_INTS	(IDT_IO_INTS + 16)
+#define	APIC_NUM_IOINTS	191
+
+/* The timer interrupt is used for clock handling and drives hardclock, etc. */
+#define	APIC_TIMER_INT	(APIC_IO_INTS + APIC_NUM_IOINTS)
+
+/*  
+ ********************* !!! WARNING !!! ******************************
+ * Each local apic has an interrupt receive fifo that is two entries deep
+ * for each interrupt priority class (higher 4 bits of interrupt vector).
+ * Once the fifo is full the APIC can no longer receive interrupts for this
+ * class and sending IPIs from other CPUs will be blocked.
+ * To avoid deadlocks there should be no more than two IPI interrupts
+ * pending at the same time.
+ * Currently this is guaranteed by dividing the IPIs in two groups that have 
+ * each at most one IPI interrupt pending. The first group is protected by the
+ * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
+ * at a time) The second group uses a single interrupt and a bitmap to avoid
+ * redundant IPI interrupts.
+ */ 
+
+/* Interrupts for local APIC LVT entries other than the timer. */
+#define	APIC_LOCAL_INTS	240
+#define	APIC_ERROR_INT	APIC_LOCAL_INTS
+#define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
+#define	APIC_CMC_INT	(APIC_LOCAL_INTS + 2)
+
+#define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 3)
+#define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
+#define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
+#define	IPI_INVLPG	(APIC_IPI_INTS + 2)
+#define	IPI_INVLRNG	(APIC_IPI_INTS + 3)
+#define	IPI_INVLCACHE	(APIC_IPI_INTS + 4)
+/* Vector to handle bitmap based IPIs */
+#define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 6) 
+
+/* IPIs handled by IPI_BITMAPED_VECTOR  (XXX ups is there a better place?) */
+#define	IPI_AST		0 	/* Generate software trap. */
+#define IPI_PREEMPT     1
+#define IPI_HARDCLOCK   2
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
+#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
+
+#define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
+#define	IPI_SUSPEND	(APIC_IPI_INTS + 8)	/* Suspend CPU until restarted. */
+#define	IPI_STOP_HARD	(APIC_IPI_INTS + 9)	/* Stop CPU with a NMI. */
+
+/*
+ * The spurious interrupt can share the priority class with the IPIs since
+ * it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
+ */
+#define	APIC_SPURIOUS_INT 255
+
+#define	LVT_LINT0	0
+#define	LVT_LINT1	1
+#define	LVT_TIMER	2
+#define	LVT_ERROR	3
+#define	LVT_PMC		4
+#define	LVT_THERMAL	5
+#define	LVT_CMCI	6
+#define	LVT_MAX		LVT_CMCI
+
+#ifndef LOCORE
+
+#define	APIC_IPI_DEST_SELF	-1
+#define	APIC_IPI_DEST_ALL	-2
+#define	APIC_IPI_DEST_OTHERS	-3
+
+#define	APIC_BUS_UNKNOWN	-1
+#define	APIC_BUS_ISA		0
+#define	APIC_BUS_EISA		1
+#define	APIC_BUS_PCI		2
+#define	APIC_BUS_MAX		APIC_BUS_PCI
+
+/*
+ * An APIC enumerator is a psuedo bus driver that enumerates APIC's including
+ * CPU's and I/O APIC's.
+ */
+struct apic_enumerator {
+	const char *apic_name;
+	int (*apic_probe)(void);
+	int (*apic_probe_cpus)(void);
+	int (*apic_setup_local)(void);
+	int (*apic_setup_io)(void);
+	SLIST_ENTRY(apic_enumerator) apic_next;
+};
+
+inthand_t
+	IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
+	IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
+	IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
+	IDTVEC(spuriousint), IDTVEC(timerint);
+
+extern vm_paddr_t lapic_paddr;
+extern int apic_cpuids[];
+
+u_int	apic_alloc_vector(u_int apic_id, u_int irq);
+u_int	apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count,
+	    u_int align);
+void	apic_disable_vector(u_int apic_id, u_int vector);
+void	apic_enable_vector(u_int apic_id, u_int vector);
+void	apic_free_vector(u_int apic_id, u_int vector, u_int irq);
+u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
+void	apic_register_enumerator(struct apic_enumerator *enumerator);
+u_int	apic_cpuid(u_int apic_id);
+void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
+int	ioapic_disable_pin(void *cookie, u_int pin);
+int	ioapic_get_vector(void *cookie, u_int pin);
+void	ioapic_register(void *cookie);
+int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
+int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
+int	ioapic_set_extint(void *cookie, u_int pin);
+int	ioapic_set_nmi(void *cookie, u_int pin);
+int	ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol);
+int	ioapic_set_triggermode(void *cookie, u_int pin,
+	    enum intr_trigger trigger);
+int	ioapic_set_smi(void *cookie, u_int pin);
+void	lapic_create(u_int apic_id, int boot_cpu);
+void	lapic_disable(void);
+void	lapic_disable_pmc(void);
+void	lapic_dump(const char *str);
+void	lapic_enable_cmc(void);
+int	lapic_enable_pmc(void);
+void	lapic_eoi(void);
+int	lapic_id(void);
+void	lapic_init(vm_paddr_t addr);
+int	lapic_intr_pending(u_int vector);
+void	lapic_ipi_raw(register_t icrlo, u_int dest);
+void	lapic_ipi_vectored(u_int vector, int dest);
+int	lapic_ipi_wait(int delay);
+void	lapic_handle_cmc(void);
+void	lapic_handle_error(void);
+void	lapic_handle_intr(int vector, struct trapframe *frame);
+void	lapic_handle_timer(struct trapframe *frame);
+void	lapic_reenable_pmc(void);
+void	lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id);
+int	lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked);
+int	lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode);
+int	lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
+	    enum intr_polarity pol);
+int	lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
+	    enum intr_trigger trigger);
+void	lapic_set_tpr(u_int vector);
+void	lapic_setup(int boot);
+
+#endif /* !LOCORE */
+#endif /* _MACHINE_APICVAR_H_ */
diff --git a/sys/amd64/include/apm_bios.h b/sys/amd64/include/apm_bios.h
new file mode 100644
index 0000000..9cc0eee
--- /dev/null
+++ b/sys/amd64/include/apm_bios.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/apm_bios.h>
diff --git a/sys/amd64/include/asm.h b/sys/amd64/include/asm.h
new file mode 100644
index 0000000..7efd642
--- /dev/null
+++ b/sys/amd64/include/asm.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)DEFS.h	5.1 (Berkeley) 4/23/90
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_ASM_H_
+#define	_MACHINE_ASM_H_
+
+#include <sys/cdefs.h>
+
+#ifdef PIC
+#define	PIC_PLT(x)	x@PLT
+#define	PIC_GOT(x)	x@GOTPCREL(%rip)
+#else
+#define	PIC_PLT(x)	x
+#define	PIC_GOT(x)	x
+#endif
+
+/*
+ * CNAME and HIDENAME manage the relationship between symbol names in C
+ * and the equivalent assembly language names.  CNAME is given a name as
+ * it would be used in a C program.  It expands to the equivalent assembly
+ * language name.  HIDENAME is given an assembly-language name, and expands
+ * to a possibly-modified form that will be invisible to C programs.
+ */
+#define CNAME(csym)		csym
+#define HIDENAME(asmsym)	.asmsym
+
+#define _START_ENTRY	.text; .p2align 4,0x90
+
+#define _ENTRY(x)	_START_ENTRY; \
+			.globl CNAME(x); .type CNAME(x),@function; CNAME(x):
+
+#ifdef PROF
+#define	ALTENTRY(x)	_ENTRY(x); \
+			pushq %rbp; movq %rsp,%rbp; \
+			call PIC_PLT(HIDENAME(mcount)); \
+			popq %rbp; \
+			jmp 9f
+#define	ENTRY(x)	_ENTRY(x); \
+			pushq %rbp; movq %rsp,%rbp; \
+			call PIC_PLT(HIDENAME(mcount)); \
+			popq %rbp; \
+			9:
+#else
+#define	ALTENTRY(x)	_ENTRY(x)
+#define	ENTRY(x)	_ENTRY(x)
+#endif
+
+#define	END(x)		.size x, . - x
+
+#define RCSID(x)	.text; .asciz x
+
+#undef __FBSDID
+#if !defined(lint) && !defined(STRIP_FBSDID)
+#define __FBSDID(s)	.ident s
+#else
+#define __FBSDID(s)	/* nothing */
+#endif /* not lint and not STRIP_FBSDID */
+
+#endif /* !_MACHINE_ASM_H_ */
diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h
new file mode 100644
index 0000000..1fb592a
--- /dev/null
+++ b/sys/amd64/include/asmacros.h
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_ASMACROS_H_
+#define _MACHINE_ASMACROS_H_
+
+#include <sys/cdefs.h>
+
+/* XXX too much duplication in various asm*.h's. */
+
+/*
+ * CNAME is used to manage the relationship between symbol names in C
+ * and the equivalent assembly language names.  CNAME is given a name as
+ * it would be used in a C program.  It expands to the equivalent assembly
+ * language name.
+ */
+#define CNAME(csym)		csym
+
+#define ALIGN_DATA	.p2align 3	/* 8 byte alignment, zero filled */
+#ifdef GPROF
+#define ALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
+#else
+#define ALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
+#endif
+#define SUPERALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
+
+#define GEN_ENTRY(name)		ALIGN_TEXT; .globl CNAME(name); \
+				.type CNAME(name),@function; CNAME(name):
+#define NON_GPROF_ENTRY(name)	GEN_ENTRY(name)
+#define NON_GPROF_RET		.byte 0xc3	/* opcode for `ret' */
+
+#define	END(name)		.size name, . - name
+
+#ifdef GPROF
+/*
+ * __mcount is like [.]mcount except that doesn't require its caller to set
+ * up a frame pointer.  It must be called before pushing anything onto the
+ * stack.  gcc should eventually generate code to call __mcount in most
+ * cases.  This would make -pg in combination with -fomit-frame-pointer
+ * useful.  gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
+ * allow profiling before setting up the frame pointer, but this is
+ * inadequate for good handling of special cases, e.g., -fpic works best
+ * with profiling after the prologue.
+ *
+ * [.]mexitcount is a new function to support non-statistical profiling if an
+ * accurate clock is available.  For C sources, calls to it are generated
+ * by the FreeBSD extension `-mprofiler-epilogue' to gcc.  It is best to
+ * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does,
+ * but gcc currently generates calls to it at the start of the epilogue to
+ * avoid problems with -fpic.
+ *
+ * [.]mcount and __mcount may clobber the call-used registers and %ef.
+ * [.]mexitcount may clobber %ecx and %ef.
+ *
+ * Cross-jumping makes non-statistical profiling timing more complicated.
+ * It is handled in many cases by calling [.]mexitcount before jumping.  It
+ * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL().
+ * It is handled for some fault-handling jumps by not sharing the exit
+ * routine.
+ *
+ * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
+ * the main entry point.  Note that alt entries are counted twice.  They
+ * have to be counted as ordinary entries for gprof to get the call times
+ * right for the ordinary entries.
+ *
+ * High local labels are used in macros to avoid clashes with local labels
+ * in functions.
+ *
+ * Ordinary `ret' is used instead of a macro `RET' because there are a lot
+ * of `ret's.  0xc3 is the opcode for `ret' (`#define ret ... ret' can't
+ * be used because this file is sometimes preprocessed in traditional mode).
+ * `ret' clobbers eflags but this doesn't matter.
+ */
+#define ALTENTRY(name)		GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
+#define	CROSSJUMP(jtrue, label, jfalse) \
+	jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8:
+#define CROSSJUMPTARGET(label) \
+	ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label
+#define ENTRY(name)		GEN_ENTRY(name) ; 9: ; MCOUNT
+#define FAKE_MCOUNT(caller)	pushq caller ; call __mcount ; popq %rcx
+#define MCOUNT			call __mcount
+#define MCOUNT_LABEL(name)	GEN_ENTRY(name) ; nop ; ALIGN_TEXT
+#ifdef GUPROF
+#define MEXITCOUNT		call .mexitcount
+#define ret			MEXITCOUNT ; NON_GPROF_RET
+#else
+#define MEXITCOUNT
+#endif
+
+#else /* !GPROF */
+/*
+ * ALTENTRY() has to align because it is before a corresponding ENTRY().
+ * ENTRY() has to align to because there may be no ALTENTRY() before it.
+ * If there is a previous ALTENTRY() then the alignment code for ENTRY()
+ * is empty.
+ */
+#define ALTENTRY(name)		GEN_ENTRY(name)
+#define	CROSSJUMP(jtrue, label, jfalse)	jtrue label
+#define	CROSSJUMPTARGET(label)
+#define ENTRY(name)		GEN_ENTRY(name)
+#define FAKE_MCOUNT(caller)
+#define MCOUNT
+#define MCOUNT_LABEL(name)
+#define MEXITCOUNT
+#endif /* GPROF */
+
+#ifdef LOCORE
+/*
+ * Convenience macro for declaring interrupt entry points.
+ */
+#define	IDTVEC(name)	ALIGN_TEXT; .globl __CONCAT(X,name); \
+			.type __CONCAT(X,name),@function; __CONCAT(X,name):
+
+/*
+ * Macros to create and destroy a trap frame.
+ */
+#define PUSH_FRAME							\
+	subq	$TF_RIP,%rsp ;	/* skip dummy tf_err and tf_trapno */	\
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */	\
+	jz	1f ;		/* Yes, dont swapgs again */		\
+	swapgs ;							\
+1:	movq	%rdi,TF_RDI(%rsp) ;					\
+	movq	%rsi,TF_RSI(%rsp) ;					\
+	movq	%rdx,TF_RDX(%rsp) ;					\
+	movq	%rcx,TF_RCX(%rsp) ;					\
+	movq	%r8,TF_R8(%rsp) ;					\
+	movq	%r9,TF_R9(%rsp) ;					\
+	movq	%rax,TF_RAX(%rsp) ;					\
+	movq	%rbx,TF_RBX(%rsp) ;					\
+	movq	%rbp,TF_RBP(%rsp) ;					\
+	movq	%r10,TF_R10(%rsp) ;					\
+	movq	%r11,TF_R11(%rsp) ;					\
+	movq	%r12,TF_R12(%rsp) ;					\
+	movq	%r13,TF_R13(%rsp) ;					\
+	movq	%r14,TF_R14(%rsp) ;					\
+	movq	%r15,TF_R15(%rsp) ;					\
+	movw	%fs,TF_FS(%rsp) ;					\
+	movw	%gs,TF_GS(%rsp) ;					\
+	movw	%es,TF_ES(%rsp) ;					\
+	movw	%ds,TF_DS(%rsp) ;					\
+	movl	$TF_HASSEGS,TF_FLAGS(%rsp) ;				\
+	cld
+
+#define POP_FRAME							\
+	movq	TF_RDI(%rsp),%rdi ;					\
+	movq	TF_RSI(%rsp),%rsi ;					\
+	movq	TF_RDX(%rsp),%rdx ;					\
+	movq	TF_RCX(%rsp),%rcx ;					\
+	movq	TF_R8(%rsp),%r8 ;					\
+	movq	TF_R9(%rsp),%r9 ;					\
+	movq	TF_RAX(%rsp),%rax ;					\
+	movq	TF_RBX(%rsp),%rbx ;					\
+	movq	TF_RBP(%rsp),%rbp ;					\
+	movq	TF_R10(%rsp),%r10 ;					\
+	movq	TF_R11(%rsp),%r11 ;					\
+	movq	TF_R12(%rsp),%r12 ;					\
+	movq	TF_R13(%rsp),%r13 ;					\
+	movq	TF_R14(%rsp),%r14 ;					\
+	movq	TF_R15(%rsp),%r15 ;					\
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */	\
+	jz	1f ;		/* keep kernel GS.base */		\
+	cli ;								\
+	swapgs ;							\
+1:	addq	$TF_RIP,%rsp	/* skip over tf_err, tf_trapno */
+
+/*
+ * Access per-CPU data.
+ */
+#define	PCPU(member)	%gs:PC_ ## member
+#define	PCPU_ADDR(member, reg)					\
+	movq %gs:PC_PRVSPACE, reg ;				\
+	addq $PC_ ## member, reg
+
+#endif /* LOCORE */
+
+#endif /* !_MACHINE_ASMACROS_H_ */
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
new file mode 100644
index 0000000..91c33e6
--- /dev/null
+++ b/sys/amd64/include/atomic.h
@@ -0,0 +1,483 @@
+/*-
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _MACHINE_ATOMIC_H_
+#define	_MACHINE_ATOMIC_H_
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+#define	mb()	__asm __volatile("mfence;" : : : "memory")
+#define	wmb()	__asm __volatile("sfence;" : : : "memory")
+#define	rmb()	__asm __volatile("lfence;" : : : "memory")
+
+/*
+ * Various simple operations on memory, each of which is atomic in the
+ * presence of interrupts and multiple processors.
+ *
+ * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
+ * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
+ * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
+ * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
+ *
+ * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
+ * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
+ * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
+ * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
+ *
+ * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
+ * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
+ * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
+ * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
+ * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
+ *
+ * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
+ * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
+ * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
+ * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
+ * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
+ */
+
+/*
+ * The above functions are expanded inline in the statically-linked
+ * kernel.  Lock prefixes are generated if an SMP kernel is being
+ * built.
+ *
+ * Kernel modules call real functions which are built into the kernel.
+ * This allows kernel modules to be portable between UP and SMP systems.
+ */
+#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
+#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
+void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
+void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
+
+int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
+int	atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src);
+u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
+u_long	atomic_fetchadd_long(volatile u_long *p, u_long v);
+
+#define	ATOMIC_LOAD(TYPE, LOP)					\
+u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define	ATOMIC_STORE(TYPE)					\
+void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
+
+#else /* !KLD_MODULE && __GNUCLIKE_ASM */
+
+/*
+ * For userland, always use lock prefixes so that the binaries will run
+ * on both SMP and !SMP systems.
+ */
+#if defined(SMP) || !defined(_KERNEL)
+#define	MPLOCKED	"lock ; "
+#else
+#define	MPLOCKED
+#endif
+
+/*
+ * The assembly is volatilized to avoid code chunk removal by the compiler.
+ * GCC aggressively reorders operations and memory clobbering is necessary
+ * in order to avoid that for memory barriers.
+ */
+#define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
+static __inline void					\
+atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile(MPLOCKED OP			\
+	: "=m" (*p)					\
+	: CONS (V), "m" (*p)				\
+	: "cc");					\
+}							\
+							\
+static __inline void					\
+atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile(MPLOCKED OP			\
+	: "=m" (*p)					\
+	: CONS (V), "m" (*p)				\
+	: "memory", "cc");				\
+}							\
+struct __hack
+
+/*
+ * Atomic compare and set, used by the mutex functions
+ *
+ * if (*dst == expect) *dst = src (all 32 bit words)
+ *
+ * Returns 0 on failure, non-zero on success
+ */
+
+static __inline int
+atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
+{
+	u_char res;
+
+	__asm __volatile(
+	"	" MPLOCKED "		"
+	"	cmpxchgl %2,%1 ;	"
+	"       sete	%0 ;		"
+	"1:				"
+	"# atomic_cmpset_int"
+	: "=a" (res),			/* 0 */
+	  "=m" (*dst)			/* 1 */
+	: "r" (src),			/* 2 */
+	  "a" (expect),			/* 3 */
+	  "m" (*dst)			/* 4 */
+	: "memory", "cc");
+
+	return (res);
+}
+
+static __inline int
+atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
+{
+	u_char res;
+
+	__asm __volatile(
+	"	" MPLOCKED "		"
+	"	cmpxchgq %2,%1 ;	"
+	"       sete	%0 ;		"
+	"1:				"
+	"# atomic_cmpset_long"
+	: "=a" (res),			/* 0 */
+	  "=m" (*dst)			/* 1 */
+	: "r" (src),			/* 2 */
+	  "a" (expect),			/* 3 */
+	  "m" (*dst)			/* 4 */
+	: "memory", "cc");
+
+	return (res);
+}
+
+/*
+ * Atomically add the value of v to the integer pointed to by p and return
+ * the previous value of *p.
+ */
+static __inline u_int
+atomic_fetchadd_int(volatile u_int *p, u_int v)
+{
+
+	__asm __volatile(
+	"	" MPLOCKED "		"
+	"	xaddl	%0, %1 ;	"
+	"# atomic_fetchadd_int"
+	: "+r" (v),			/* 0 (result) */
+	  "=m" (*p)			/* 1 */
+	: "m" (*p)			/* 2 */
+	: "cc");
+	return (v);
+}
+
+/*
+ * Atomically add the value of v to the long integer pointed to by p and return
+ * the previous value of *p.
+ */
+static __inline u_long
+atomic_fetchadd_long(volatile u_long *p, u_long v)
+{
+
+	__asm __volatile(
+	"	" MPLOCKED "		"
+	"	xaddq	%0, %1 ;	"
+	"# atomic_fetchadd_long"
+	: "+r" (v),			/* 0 (result) */
+	  "=m" (*p)			/* 1 */
+	: "m" (*p)			/* 2 */
+	: "cc");
+	return (v);
+}
+
+/*
+ * We assume that a = b will do atomic loads and stores.  Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels.  We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence.  For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
+ */
+#define	ATOMIC_STORE(TYPE)				\
+static __inline void					\
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__compiler_membar();				\
+	*p = v;						\
+}							\
+struct __hack
+
+#if defined(_KERNEL) && !defined(SMP)
+
+#define	ATOMIC_LOAD(TYPE, LOP)				\
+static __inline u_##TYPE				\
+atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
+{							\
+	u_##TYPE tmp;					\
+							\
+	tmp = *p;					\
+	__compiler_membar();				\
+	return (tmp);					\
+}							\
+struct __hack
+
+#else /* !(_KERNEL && !SMP) */
+
+#define	ATOMIC_LOAD(TYPE, LOP)				\
+static __inline u_##TYPE				\
+atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
+{							\
+	u_##TYPE res;					\
+							\
+	__asm __volatile(MPLOCKED LOP			\
+	: "=a" (res),			/* 0 */		\
+	  "=m" (*p)			/* 1 */		\
+	: "m" (*p)			/* 2 */		\
+	: "memory", "cc");				\
+							\
+	return (res);					\
+}							\
+struct __hack
+
+#endif /* _KERNEL && !SMP */
+
+#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
+
+ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
+ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
+ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
+ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
+
+ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
+ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
+ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
+ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
+
+ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
+ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
+ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
+ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
+
+ATOMIC_ASM(set,	     long,  "orq %1,%0",   "ir",  v);
+ATOMIC_ASM(clear,    long,  "andq %1,%0",  "ir", ~v);
+ATOMIC_ASM(add,	     long,  "addq %1,%0",  "ir",  v);
+ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
+
+ATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int,   "cmpxchgl %0,%1");
+ATOMIC_LOAD(long,  "cmpxchgq %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
+
+#undef ATOMIC_ASM
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
+
+#ifndef WANT_FUNCTIONS
+
+/* Read the current value and store a zero in the destination. */
+#ifdef __GNUCLIKE_ASM
+
+static __inline u_int
+atomic_readandclear_int(volatile u_int *addr)
+{
+	u_int res;
+
+	res = 0;
+	__asm __volatile(
+	"	xchgl	%1,%0 ;		"
+	"# atomic_readandclear_int"
+	: "+r" (res),			/* 0 */
+	  "=m" (*addr)			/* 1 */
+	: "m" (*addr));
+
+	return (res);
+}
+
+static __inline u_long
+atomic_readandclear_long(volatile u_long *addr)
+{
+	u_long res;
+
+	res = 0;
+	__asm __volatile(
+	"	xchgq	%1,%0 ;		"
+	"# atomic_readandclear_long"
+	: "+r" (res),			/* 0 */
+	  "=m" (*addr)			/* 1 */
+	: "m" (*addr));
+
+	return (res);
+}
+
+#else /* !__GNUCLIKE_ASM */
+
+u_int	atomic_readandclear_int(volatile u_int *addr);
+u_long	atomic_readandclear_long(volatile u_long *addr);
+
+#endif /* __GNUCLIKE_ASM */
+
+#define	atomic_set_acq_char		atomic_set_barr_char
+#define	atomic_set_rel_char		atomic_set_barr_char
+#define	atomic_clear_acq_char		atomic_clear_barr_char
+#define	atomic_clear_rel_char		atomic_clear_barr_char
+#define	atomic_add_acq_char		atomic_add_barr_char
+#define	atomic_add_rel_char		atomic_add_barr_char
+#define	atomic_subtract_acq_char	atomic_subtract_barr_char
+#define	atomic_subtract_rel_char	atomic_subtract_barr_char
+
+#define	atomic_set_acq_short		atomic_set_barr_short
+#define	atomic_set_rel_short		atomic_set_barr_short
+#define	atomic_clear_acq_short		atomic_clear_barr_short
+#define	atomic_clear_rel_short		atomic_clear_barr_short
+#define	atomic_add_acq_short		atomic_add_barr_short
+#define	atomic_add_rel_short		atomic_add_barr_short
+#define	atomic_subtract_acq_short	atomic_subtract_barr_short
+#define	atomic_subtract_rel_short	atomic_subtract_barr_short
+
+#define	atomic_set_acq_int		atomic_set_barr_int
+#define	atomic_set_rel_int		atomic_set_barr_int
+#define	atomic_clear_acq_int		atomic_clear_barr_int
+#define	atomic_clear_rel_int		atomic_clear_barr_int
+#define	atomic_add_acq_int		atomic_add_barr_int
+#define	atomic_add_rel_int		atomic_add_barr_int
+#define	atomic_subtract_acq_int		atomic_subtract_barr_int
+#define	atomic_subtract_rel_int		atomic_subtract_barr_int
+#define	atomic_cmpset_acq_int		atomic_cmpset_int
+#define	atomic_cmpset_rel_int		atomic_cmpset_int
+
+#define	atomic_set_acq_long		atomic_set_barr_long
+#define	atomic_set_rel_long		atomic_set_barr_long
+#define	atomic_clear_acq_long		atomic_clear_barr_long
+#define	atomic_clear_rel_long		atomic_clear_barr_long
+#define	atomic_add_acq_long		atomic_add_barr_long
+#define	atomic_add_rel_long		atomic_add_barr_long
+#define	atomic_subtract_acq_long	atomic_subtract_barr_long
+#define	atomic_subtract_rel_long	atomic_subtract_barr_long
+#define	atomic_cmpset_acq_long		atomic_cmpset_long
+#define	atomic_cmpset_rel_long		atomic_cmpset_long
+
+/* Operations on 8-bit bytes. */
+#define	atomic_set_8		atomic_set_char
+#define	atomic_set_acq_8	atomic_set_acq_char
+#define	atomic_set_rel_8	atomic_set_rel_char
+#define	atomic_clear_8		atomic_clear_char
+#define	atomic_clear_acq_8	atomic_clear_acq_char
+#define	atomic_clear_rel_8	atomic_clear_rel_char
+#define	atomic_add_8		atomic_add_char
+#define	atomic_add_acq_8	atomic_add_acq_char
+#define	atomic_add_rel_8	atomic_add_rel_char
+#define	atomic_subtract_8	atomic_subtract_char
+#define	atomic_subtract_acq_8	atomic_subtract_acq_char
+#define	atomic_subtract_rel_8	atomic_subtract_rel_char
+#define	atomic_load_acq_8	atomic_load_acq_char
+#define	atomic_store_rel_8	atomic_store_rel_char
+
+/* Operations on 16-bit words. */
+#define	atomic_set_16		atomic_set_short
+#define	atomic_set_acq_16	atomic_set_acq_short
+#define	atomic_set_rel_16	atomic_set_rel_short
+#define	atomic_clear_16		atomic_clear_short
+#define	atomic_clear_acq_16	atomic_clear_acq_short
+#define	atomic_clear_rel_16	atomic_clear_rel_short
+#define	atomic_add_16		atomic_add_short
+#define	atomic_add_acq_16	atomic_add_acq_short
+#define	atomic_add_rel_16	atomic_add_rel_short
+#define	atomic_subtract_16	atomic_subtract_short
+#define	atomic_subtract_acq_16	atomic_subtract_acq_short
+#define	atomic_subtract_rel_16	atomic_subtract_rel_short
+#define	atomic_load_acq_16	atomic_load_acq_short
+#define	atomic_store_rel_16	atomic_store_rel_short
+
+/* Operations on 32-bit double words. */
+#define	atomic_set_32		atomic_set_int
+#define	atomic_set_acq_32	atomic_set_acq_int
+#define	atomic_set_rel_32	atomic_set_rel_int
+#define	atomic_clear_32		atomic_clear_int
+#define	atomic_clear_acq_32	atomic_clear_acq_int
+#define	atomic_clear_rel_32	atomic_clear_rel_int
+#define	atomic_add_32		atomic_add_int
+#define	atomic_add_acq_32	atomic_add_acq_int
+#define	atomic_add_rel_32	atomic_add_rel_int
+#define	atomic_subtract_32	atomic_subtract_int
+#define	atomic_subtract_acq_32	atomic_subtract_acq_int
+#define	atomic_subtract_rel_32	atomic_subtract_rel_int
+#define	atomic_load_acq_32	atomic_load_acq_int
+#define	atomic_store_rel_32	atomic_store_rel_int
+#define	atomic_cmpset_32	atomic_cmpset_int
+#define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
+#define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
+#define	atomic_readandclear_32	atomic_readandclear_int
+#define	atomic_fetchadd_32	atomic_fetchadd_int
+
+/* Operations on 64-bit quad words. */
+#define	atomic_set_64		atomic_set_long
+#define	atomic_set_acq_64	atomic_set_acq_long
+#define	atomic_set_rel_64	atomic_set_rel_long
+#define	atomic_clear_64		atomic_clear_long
+#define	atomic_clear_acq_64	atomic_clear_acq_long
+#define	atomic_clear_rel_64	atomic_clear_rel_long
+#define	atomic_add_64		atomic_add_long
+#define	atomic_add_acq_64	atomic_add_acq_long
+#define	atomic_add_rel_64	atomic_add_rel_long
+#define	atomic_subtract_64	atomic_subtract_long
+#define	atomic_subtract_acq_64	atomic_subtract_acq_long
+#define	atomic_subtract_rel_64	atomic_subtract_rel_long
+#define	atomic_load_acq_64	atomic_load_acq_long
+#define	atomic_store_rel_64	atomic_store_rel_long
+#define	atomic_cmpset_64	atomic_cmpset_long
+#define	atomic_cmpset_acq_64	atomic_cmpset_acq_long
+#define	atomic_cmpset_rel_64	atomic_cmpset_rel_long
+#define	atomic_readandclear_64	atomic_readandclear_long
+
+/* Operations on pointers. */
+#define	atomic_set_ptr		atomic_set_long
+#define	atomic_set_acq_ptr	atomic_set_acq_long
+#define	atomic_set_rel_ptr	atomic_set_rel_long
+#define	atomic_clear_ptr	atomic_clear_long
+#define	atomic_clear_acq_ptr	atomic_clear_acq_long
+#define	atomic_clear_rel_ptr	atomic_clear_rel_long
+#define	atomic_add_ptr		atomic_add_long
+#define	atomic_add_acq_ptr	atomic_add_acq_long
+#define	atomic_add_rel_ptr	atomic_add_rel_long
+#define	atomic_subtract_ptr	atomic_subtract_long
+#define	atomic_subtract_acq_ptr	atomic_subtract_acq_long
+#define	atomic_subtract_rel_ptr	atomic_subtract_rel_long
+#define	atomic_load_acq_ptr	atomic_load_acq_long
+#define	atomic_store_rel_ptr	atomic_store_rel_long
+#define	atomic_cmpset_ptr	atomic_cmpset_long
+#define	atomic_cmpset_acq_ptr	atomic_cmpset_acq_long
+#define	atomic_cmpset_rel_ptr	atomic_cmpset_rel_long
+#define	atomic_readandclear_ptr	atomic_readandclear_long
+
+#endif /* !WANT_FUNCTIONS */
+
+#endif /* !_MACHINE_ATOMIC_H_ */
diff --git a/sys/amd64/include/bus.h b/sys/amd64/include/bus.h
new file mode 100644
index 0000000..f1af2cf
--- /dev/null
+++ b/sys/amd64/include/bus.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/bus.h>
diff --git a/sys/amd64/include/bus_dma.h b/sys/amd64/include/bus_dma.h
new file mode 100644
index 0000000..bc8bdf4
--- /dev/null
+++ b/sys/amd64/include/bus_dma.h
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2005 Scott Long
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _AMD64_BUS_DMA_H_
+#define _AMD64_BUS_DMA_H_
+
+#include <sys/bus_dma.h>
+
+#endif /* _AMD64_BUS_DMA_H_ */
diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h
new file mode 100644
index 0000000..d7f7d82
--- /dev/null
+++ b/sys/amd64/include/clock.h
@@ -0,0 +1,43 @@
+/*-
+ * Kernel interface to machine-dependent clock driver.
+ * Garrett Wollman, September 1994.
+ * This file is in the public domain.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_CLOCK_H_
+#define	_MACHINE_CLOCK_H_
+
+#ifdef _KERNEL
+/*
+ * i386 to clock driver interface.
+ * XXX large parts of the driver and its interface are misplaced.
+ */
+extern int	clkintr_pending;
+extern u_int	i8254_freq;
+extern int	i8254_max_count;
+extern uint64_t	tsc_freq;
+extern int	tsc_is_invariant;
+extern int	tsc_perf_stat;
+#ifdef SMP
+extern int	smp_tsc;
+#endif
+
+void	i8254_init(void);
+
+/*
+ * Driver to clock driver interface.
+ */
+
+void	startrtclock(void);
+void	init_TSC(void);
+
+#define	HAS_TIMER_SPKR 1
+int	timer_spkr_acquire(void);
+int	timer_spkr_release(void);
+void	timer_spkr_setfreq(int freq);
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_CLOCK_H_ */
diff --git a/sys/amd64/include/counter.h b/sys/amd64/include/counter.h
new file mode 100644
index 0000000..3ed5f76
--- /dev/null
+++ b/sys/amd64/include/counter.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __MACHINE_COUNTER_H__
+#define __MACHINE_COUNTER_H__
+
+#include <sys/pcpu.h>
+
+extern struct pcpu __pcpu[1];
+
+#define	counter_enter()	do {} while (0)
+#define	counter_exit()	do {} while (0)
+
+#define	counter_u64_add_protected(c, i)	counter_u64_add(c, i)
+
+static inline void
+counter_u64_add(counter_u64_t c, int64_t inc)
+{
+
+	__asm __volatile("addq\t%1,%%gs:(%0)"
+	    :
+	    : "r" ((char *)c - (char *)&__pcpu[0]), "r" (inc)
+	    : "memory", "cc");
+}
+
+#endif	/* ! __MACHINE_COUNTER_H__ */
diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h
new file mode 100644
index 0000000..1c2871f
--- /dev/null
+++ b/sys/amd64/include/cpu.h
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)cpu.h	5.4 (Berkeley) 5/9/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_CPU_H_
+#define	_MACHINE_CPU_H_
+
+/*
+ * Definitions unique to i386 cpu support.
+ */
+#include <machine/psl.h>
+#include <machine/frame.h>
+#include <machine/segments.h>
+
+#define	cpu_exec(p)	/* nothing */
+#define	cpu_swapin(p)	/* nothing */
+#define	cpu_getstack(td)		((td)->td_frame->tf_rsp)
+#define	cpu_setstack(td, ap)		((td)->td_frame->tf_rsp = (ap))
+#define	cpu_spinwait()			ia32_pause()
+
+#define	TRAPF_USERMODE(framep) \
+	(ISPL((framep)->tf_cs) == SEL_UPL)
+#define	TRAPF_PC(framep)	((framep)->tf_rip)
+
+#ifdef _KERNEL
+extern char	btext[];
+extern char	etext[];
+
+void	cpu_halt(void);
+void	cpu_reset(void);
+void	fork_trampoline(void);
+void	swi_vm(void *);
+
+/*
+ * Return contents of in-cpu fast counter as a sort of "bogo-time"
+ * for random-harvesting purposes.
+ */
+static __inline u_int64_t
+get_cyclecount(void)
+{
+
+	return (rdtsc());
+}
+
+#endif
+
+#endif /* !_MACHINE_CPU_H_ */
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
new file mode 100644
index 0000000..881fcd2
--- /dev/null
+++ b/sys/amd64/include/cpufunc.h
@@ -0,0 +1,791 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Functions to provide access to special i386 instructions.
+ * This in included in sys/systm.h, and that file should be
+ * used in preference to this.
+ */
+
+#ifndef _MACHINE_CPUFUNC_H_
+#define	_MACHINE_CPUFUNC_H_
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+struct region_descriptor;
+
+#define readb(va)	(*(volatile uint8_t *) (va))
+#define readw(va)	(*(volatile uint16_t *) (va))
+#define readl(va)	(*(volatile uint32_t *) (va))
+#define readq(va)	(*(volatile uint64_t *) (va))
+
+#define writeb(va, d)	(*(volatile uint8_t *) (va) = (d))
+#define writew(va, d)	(*(volatile uint16_t *) (va) = (d))
+#define writel(va, d)	(*(volatile uint32_t *) (va) = (d))
+#define writeq(va, d)	(*(volatile uint64_t *) (va) = (d))
+
+#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE)
+
+static __inline void
+breakpoint(void)
+{
+	__asm __volatile("int $3");
+}
+
+static __inline u_int
+bsfl(u_int mask)
+{
+	u_int	result;
+
+	__asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_long
+bsfq(u_long mask)
+{
+	u_long	result;
+
+	__asm __volatile("bsfq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_int
+bsrl(u_int mask)
+{
+	u_int	result;
+
+	__asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_long
+bsrq(u_long mask)
+{
+	u_long	result;
+
+	__asm __volatile("bsrq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline void
+clflush(u_long addr)
+{
+
+	__asm __volatile("clflush %0" : : "m" (*(char *)addr));
+}
+
+static __inline void
+clts(void)
+{
+
+	__asm __volatile("clts");
+}
+
+static __inline void
+disable_intr(void)
+{
+	__asm __volatile("cli" : : : "memory");
+}
+
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax));
+}
+
+static __inline void
+cpuid_count(u_int ax, u_int cx, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax), "c" (cx));
+}
+
+static __inline void
+enable_intr(void)
+{
+	__asm __volatile("sti");
+}
+
+#ifdef _KERNEL
+
+#define	HAVE_INLINE_FFS
+#define        ffs(x)  __builtin_ffs(x)
+
+#define	HAVE_INLINE_FFSL
+
+static __inline int
+ffsl(long mask)
+{
+	return (mask == 0 ? mask : (int)bsfq((u_long)mask) + 1);
+}
+
+#define	HAVE_INLINE_FLS
+
+static __inline int
+fls(int mask)
+{
+	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
+}
+
+#define	HAVE_INLINE_FLSL
+
+static __inline int
+flsl(long mask)
+{
+	return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
+}
+
+#endif /* _KERNEL */
+
+static __inline void
+halt(void)
+{
+	__asm __volatile("hlt");
+}
+
+static __inline u_char
+inb(u_int port)
+{
+	u_char	data;
+
+	__asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port));
+	return (data);
+}
+
+static __inline u_int
+inl(u_int port)
+{
+	u_int	data;
+
+	__asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port));
+	return (data);
+}
+
+static __inline void
+insb(u_int port, void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; insb"
+			 : "+D" (addr), "+c" (count)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+insw(u_int port, void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; insw"
+			 : "+D" (addr), "+c" (count)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+insl(u_int port, void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; insl"
+			 : "+D" (addr), "+c" (count)
+			 : "d" (port)
+			 : "memory");
+}
+
+static __inline void
+invd(void)
+{
+	__asm __volatile("invd");
+}
+
+static __inline u_short
+inw(u_int port)
+{
+	u_short	data;
+
+	__asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port));
+	return (data);
+}
+
+static __inline void
+outb(u_int port, u_char data)
+{
+	__asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port));
+}
+
+static __inline void
+outl(u_int port, u_int data)
+{
+	__asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port));
+}
+
+static __inline void
+outsb(u_int port, const void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; outsb"
+			 : "+S" (addr), "+c" (count)
+			 : "d" (port));
+}
+
+static __inline void
+outsw(u_int port, const void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; outsw"
+			 : "+S" (addr), "+c" (count)
+			 : "d" (port));
+}
+
+static __inline void
+outsl(u_int port, const void *addr, size_t count)
+{
+	__asm __volatile("cld; rep; outsl"
+			 : "+S" (addr), "+c" (count)
+			 : "d" (port));
+}
+
+static __inline void
+outw(u_int port, u_short data)
+{
+	__asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
+}
+
+static __inline u_long
+popcntq(u_long mask)
+{
+	u_long result;
+
+	__asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline void
+lfence(void)
+{
+
+	__asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
+mfence(void)
+{
+
+	__asm __volatile("mfence" : : : "memory");
+}
+
+static __inline void
+ia32_pause(void)
+{
+	__asm __volatile("pause");
+}
+
+static __inline u_long
+read_rflags(void)
+{
+	u_long	rf;
+
+	__asm __volatile("pushfq; popq %0" : "=r" (rf));
+	return (rf);
+}
+
+static __inline uint64_t
+rdmsr(u_int msr)
+{
+	uint32_t low, high;
+
+	__asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline uint64_t
+rdpmc(u_int pmc)
+{
+	uint32_t low, high;
+
+	__asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline uint64_t
+rdtsc(void)
+{
+	uint32_t low, high;
+
+	__asm __volatile("rdtsc" : "=a" (low), "=d" (high));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline uint32_t
+rdtsc32(void)
+{
+	uint32_t rv;
+
+	__asm __volatile("rdtsc" : "=a" (rv) : : "edx");
+	return (rv);
+}
+
+static __inline void
+wbinvd(void)
+{
+	__asm __volatile("wbinvd");
+}
+
+static __inline void
+write_rflags(u_long rf)
+{
+	__asm __volatile("pushq %0;  popfq" : : "r" (rf));
+}
+
+static __inline void
+wrmsr(u_int msr, uint64_t newval)
+{
+	uint32_t low, high;
+
+	low = newval;
+	high = newval >> 32;
+	__asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr));
+}
+
+static __inline void
+load_cr0(u_long data)
+{
+
+	__asm __volatile("movq %0,%%cr0" : : "r" (data));
+}
+
+static __inline u_long
+rcr0(void)
+{
+	u_long	data;
+
+	__asm __volatile("movq %%cr0,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline u_long
+rcr2(void)
+{
+	u_long	data;
+
+	__asm __volatile("movq %%cr2,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_cr3(u_long data)
+{
+
+	__asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory");
+}
+
+static __inline u_long
+rcr3(void)
+{
+	u_long	data;
+
+	__asm __volatile("movq %%cr3,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_cr4(u_long data)
+{
+	__asm __volatile("movq %0,%%cr4" : : "r" (data));
+}
+
+static __inline u_long
+rcr4(void)
+{
+	u_long	data;
+
+	__asm __volatile("movq %%cr4,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline u_long
+rxcr(u_int reg)
+{
+	u_int low, high;
+
+	__asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, u_long val)
+{
+	u_int low, high;
+
+	low = val;
+	high = val >> 32;
+	__asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
+/*
+ * Global TLB flush (except for thise for pages marked PG_G)
+ */
+static __inline void
+invltlb(void)
+{
+
+	load_cr3(rcr3());
+}
+
+/*
+ * TLB flush for an individual page (even if it has PG_G).
+ * Only works on 486+ CPUs (i386 does not have PG_G).
+ */
+static __inline void
+invlpg(u_long addr)
+{
+
+	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
+}
+
+static __inline u_short
+rfs(void)
+{
+	u_short sel;
+	__asm __volatile("movw %%fs,%0" : "=rm" (sel));
+	return (sel);
+}
+
+static __inline u_short
+rgs(void)
+{
+	u_short sel;
+	__asm __volatile("movw %%gs,%0" : "=rm" (sel));
+	return (sel);
+}
+
+static __inline u_short
+rss(void)
+{
+	u_short sel;
+	__asm __volatile("movw %%ss,%0" : "=rm" (sel));
+	return (sel);
+}
+
+static __inline void
+load_ds(u_short sel)
+{
+	__asm __volatile("movw %0,%%ds" : : "rm" (sel));
+}
+
+static __inline void
+load_es(u_short sel)
+{
+	__asm __volatile("movw %0,%%es" : : "rm" (sel));
+}
+
+static __inline void
+cpu_monitor(const void *addr, u_long extensions, u_int hints)
+{
+
+	__asm __volatile("monitor"
+	    : : "a" (addr), "c" (extensions), "d" (hints));
+}
+
+static __inline void
+cpu_mwait(u_long extensions, u_int hints)
+{
+
+	__asm __volatile("mwait" : : "a" (hints), "c" (extensions));
+}
+
+#ifdef _KERNEL
+/* This is defined in <machine/specialreg.h> but is too painful to get to */
+#ifndef	MSR_FSBASE
+#define	MSR_FSBASE	0xc0000100
+#endif
+static __inline void
+load_fs(u_short sel)
+{
+	/* Preserve the fsbase value across the selector load */
+	__asm __volatile("rdmsr; movw %0,%%fs; wrmsr"
+	    : : "rm" (sel), "c" (MSR_FSBASE) : "eax", "edx");
+}
+
+#ifndef	MSR_GSBASE
+#define	MSR_GSBASE	0xc0000101
+#endif
+static __inline void
+load_gs(u_short sel)
+{
+	/*
+	 * Preserve the gsbase value across the selector load.
+	 * Note that we have to disable interrupts because the gsbase
+	 * being trashed happens to be the kernel gsbase at the time.
+	 */
+	__asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq"
+	    : : "rm" (sel), "c" (MSR_GSBASE) : "eax", "edx");
+}
+#else
+/* Usable by userland */
+static __inline void
+load_fs(u_short sel)
+{
+	__asm __volatile("movw %0,%%fs" : : "rm" (sel));
+}
+
+static __inline void
+load_gs(u_short sel)
+{
+	__asm __volatile("movw %0,%%gs" : : "rm" (sel));
+}
+#endif
+
+static __inline void
+lidt(struct region_descriptor *addr)
+{
+	__asm __volatile("lidt (%0)" : : "r" (addr));
+}
+
+static __inline void
+lldt(u_short sel)
+{
+	__asm __volatile("lldt %0" : : "r" (sel));
+}
+
+static __inline void
+ltr(u_short sel)
+{
+	__asm __volatile("ltr %0" : : "r" (sel));
+}
+
+static __inline uint64_t
+rdr0(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr0,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr0(uint64_t dr0)
+{
+	__asm __volatile("movq %0,%%dr0" : : "r" (dr0));
+}
+
+static __inline uint64_t
+rdr1(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr1,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr1(uint64_t dr1)
+{
+	__asm __volatile("movq %0,%%dr1" : : "r" (dr1));
+}
+
+static __inline uint64_t
+rdr2(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr2,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr2(uint64_t dr2)
+{
+	__asm __volatile("movq %0,%%dr2" : : "r" (dr2));
+}
+
+static __inline uint64_t
+rdr3(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr3,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr3(uint64_t dr3)
+{
+	__asm __volatile("movq %0,%%dr3" : : "r" (dr3));
+}
+
+static __inline uint64_t
+rdr4(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr4,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr4(uint64_t dr4)
+{
+	__asm __volatile("movq %0,%%dr4" : : "r" (dr4));
+}
+
+static __inline uint64_t
+rdr5(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr5,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr5(uint64_t dr5)
+{
+	__asm __volatile("movq %0,%%dr5" : : "r" (dr5));
+}
+
+static __inline uint64_t
+rdr6(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr6,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr6(uint64_t dr6)
+{
+	__asm __volatile("movq %0,%%dr6" : : "r" (dr6));
+}
+
+static __inline uint64_t
+rdr7(void)
+{
+	uint64_t data;
+	__asm __volatile("movq %%dr7,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_dr7(uint64_t dr7)
+{
+	__asm __volatile("movq %0,%%dr7" : : "r" (dr7));
+}
+
+static __inline register_t
+intr_disable(void)
+{
+	register_t rflags;
+
+	rflags = read_rflags();
+	disable_intr();
+	return (rflags);
+}
+
+static __inline void
+intr_restore(register_t rflags)
+{
+	write_rflags(rflags);
+}
+
+#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
+
+int	breakpoint(void);
+u_int	bsfl(u_int mask);
+u_int	bsrl(u_int mask);
+void	clflush(u_long addr);
+void	clts(void);
+void	cpuid_count(u_int ax, u_int cx, u_int *p);
+void	disable_intr(void);
+void	do_cpuid(u_int ax, u_int *p);
+void	enable_intr(void);
+void	halt(void);
+void	ia32_pause(void);
+u_char	inb(u_int port);
+u_int	inl(u_int port);
+void	insb(u_int port, void *addr, size_t count);
+void	insl(u_int port, void *addr, size_t count);
+void	insw(u_int port, void *addr, size_t count);
+register_t	intr_disable(void);
+void	intr_restore(register_t rf);
+void	invd(void);
+void	invlpg(u_int addr);
+void	invltlb(void);
+u_short	inw(u_int port);
+void	lidt(struct region_descriptor *addr);
+void	lldt(u_short sel);
+void	load_cr0(u_long cr0);
+void	load_cr3(u_long cr3);
+void	load_cr4(u_long cr4);
+void	load_dr0(uint64_t dr0);
+void	load_dr1(uint64_t dr1);
+void	load_dr2(uint64_t dr2);
+void	load_dr3(uint64_t dr3);
+void	load_dr4(uint64_t dr4);
+void	load_dr5(uint64_t dr5);
+void	load_dr6(uint64_t dr6);
+void	load_dr7(uint64_t dr7);
+void	load_fs(u_short sel);
+void	load_gs(u_short sel);
+void	ltr(u_short sel);
+void	outb(u_int port, u_char data);
+void	outl(u_int port, u_int data);
+void	outsb(u_int port, const void *addr, size_t count);
+void	outsl(u_int port, const void *addr, size_t count);
+void	outsw(u_int port, const void *addr, size_t count);
+void	outw(u_int port, u_short data);
+u_long	rcr0(void);
+u_long	rcr2(void);
+u_long	rcr3(void);
+u_long	rcr4(void);
+uint64_t rdmsr(u_int msr);
+uint64_t rdpmc(u_int pmc);
+uint64_t rdr0(void);
+uint64_t rdr1(void);
+uint64_t rdr2(void);
+uint64_t rdr3(void);
+uint64_t rdr4(void);
+uint64_t rdr5(void);
+uint64_t rdr6(void);
+uint64_t rdr7(void);
+uint64_t rdtsc(void);
+u_long	read_rflags(void);
+u_int	rfs(void);
+u_int	rgs(void);
+void	wbinvd(void);
+void	write_rflags(u_int rf);
+void	wrmsr(u_int msr, uint64_t newval);
+
+#endif	/* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */
+
+void	reset_dbregs(void);
+
+#ifdef _KERNEL
+int	rdmsr_safe(u_int msr, uint64_t *val);
+int	wrmsr_safe(u_int msr, uint64_t newval);
+#endif
+
+#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/sys/amd64/include/cputypes.h b/sys/amd64/include/cputypes.h
new file mode 100644
index 0000000..eeec4e0
--- /dev/null
+++ b/sys/amd64/include/cputypes.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1993 Christopher G. Demetriou
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_CPUTYPES_H_
+#define	_MACHINE_CPUTYPES_H_
+
+/*
+ * Classes of processor.
+ */
+#define	CPUCLASS_X86		0	/* X86 */
+#define	CPUCLASS_K8		1	/* K8 AMD64 class */
+
+/*
+ * Kinds of processor.
+ */
+#define	CPU_X86			0	/* Intel */
+#define	CPU_CLAWHAMMER		1	/* AMD Clawhammer */
+#define	CPU_SLEDGEHAMMER	2	/* AMD Sledgehammer */
+
+/*
+ * Vendors of processor.
+ */
+#define	CPU_VENDOR_AMD		0x1022		/* AMD */
+#define	CPU_VENDOR_IDT		0x111d		/* Centaur/IDT/VIA */
+#define	CPU_VENDOR_INTEL	0x8086		/* Intel */
+#define	CPU_VENDOR_CENTAUR	CPU_VENDOR_IDT
+
+#ifndef LOCORE
+extern int	cpu;
+extern int	cpu_class;
+#endif
+
+#endif /* !_MACHINE_CPUTYPES_H_ */
diff --git a/sys/amd64/include/db_machdep.h b/sys/amd64/include/db_machdep.h
new file mode 100644
index 0000000..29e385e
--- /dev/null
+++ b/sys/amd64/include/db_machdep.h
@@ -0,0 +1,94 @@
+/*-
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_DB_MACHDEP_H_
+#define	_MACHINE_DB_MACHDEP_H_
+
+#include <machine/frame.h>
+#include <machine/trap.h>
+
+typedef	vm_offset_t	db_addr_t;	/* address - unsigned */
+typedef	long		db_expr_t;	/* expression - signed */
+
+#define	PC_REGS()	((db_addr_t)kdb_thrctx->pcb_rip)
+
+#define	BKPT_INST	0xcc		/* breakpoint instruction */
+#define	BKPT_SIZE	(1)		/* size of breakpoint inst */
+#define	BKPT_SET(inst)	(BKPT_INST)
+
+#define BKPT_SKIP				\
+do {						\
+	kdb_frame->tf_rip += 1;			\
+	kdb_thrctx->pcb_rip += 1;		\
+} while(0)
+
+#define	FIXUP_PC_AFTER_BREAK			\
+do {						\
+	kdb_frame->tf_rip -= 1;			\
+	kdb_thrctx->pcb_rip -= 1;		\
+} while(0);
+
+#define	db_clear_single_step	kdb_cpu_clear_singlestep
+#define	db_set_single_step	kdb_cpu_set_singlestep
+
+#define	IS_BREAKPOINT_TRAP(type, code)	((type) == T_BPTFLT)
+/*
+ * Watchpoints are not supported.  The debug exception type is in %dr6
+ * and not yet in the args to this macro.
+ */
+#define IS_WATCHPOINT_TRAP(type, code)	0
+
+#define	I_CALL		0xe8
+#define	I_CALLI		0xff
+#define	I_RET		0xc3
+#define	I_IRET		0xcf
+
+#define	inst_trap_return(ins)	(((ins)&0xff) == I_IRET)
+#define	inst_return(ins)	(((ins)&0xff) == I_RET)
+#define	inst_call(ins)		(((ins)&0xff) == I_CALL || \
+				 (((ins)&0xff) == I_CALLI && \
+				  ((ins)&0x3800) == 0x1000))
+#define inst_load(ins)		0
+#define inst_store(ins)		0
+
+/*
+ * There no interesting addresses below _kstack = 0xefbfe000.  There
+ * are small absolute values for GUPROF, but we don't want to see them.
+ * Treat "negative" addresses below _kstack as non-small to allow for
+ * future reductions of _kstack and to avoid sign extension problems.
+ *
+ * There is one interesting symbol above -db_maxoff = 0xffff0000,
+ * namely _APTD = 0xfffff000.  Accepting this would mess up the
+ * printing of small negative offsets.  The next largest symbol is
+ * _APTmap = 0xffc00000.  Accepting this is OK (unless db_maxoff is
+ * set to >= 0x400000 - (max stack offset)).
+ */
+#define	DB_SMALL_VALUE_MAX	0x7fffffff
+#define	DB_SMALL_VALUE_MIN	(-0x400001)
+
+#endif /* !_MACHINE_DB_MACHDEP_H_ */
diff --git a/sys/amd64/include/elf.h b/sys/amd64/include/elf.h
new file mode 100644
index 0000000..f932377
--- /dev/null
+++ b/sys/amd64/include/elf.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/elf.h>
diff --git a/sys/amd64/include/endian.h b/sys/amd64/include/endian.h
new file mode 100644
index 0000000..2ad27a9
--- /dev/null
+++ b/sys/amd64/include/endian.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/endian.h>
diff --git a/sys/amd64/include/exec.h b/sys/amd64/include/exec.h
new file mode 100644
index 0000000..8d07887
--- /dev/null
+++ b/sys/amd64/include/exec.h
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)exec.h	8.1 (Berkeley) 6/11/93
+ * $FreeBSD$
+ */
+
+#ifndef	_MACHINE_EXEC_H_
+#define	_MACHINE_EXEC_H_
+
+#define	__LDPGSZ	4096
+
+#endif /* !_MACHINE_EXEC_H_ */
diff --git a/sys/amd64/include/float.h b/sys/amd64/include/float.h
new file mode 100644
index 0000000..4759963
--- /dev/null
+++ b/sys/amd64/include/float.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/float.h>
diff --git a/sys/amd64/include/floatingpoint.h b/sys/amd64/include/floatingpoint.h
new file mode 100644
index 0000000..cda9e46
--- /dev/null
+++ b/sys/amd64/include/floatingpoint.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 1993 Andrew Moore, Talke Studio
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#) floatingpoint.h	1.0 (Berkeley) 9/23/93
+ * $FreeBSD$
+ */
+
+#ifndef _FLOATINGPOINT_H_
+#define _FLOATINGPOINT_H_
+
+#include <sys/cdefs.h>
+#include <machine/ieeefp.h>
+
+#endif /* !_FLOATINGPOINT_H_ */
diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h
new file mode 100644
index 0000000..1ce59d0
--- /dev/null
+++ b/sys/amd64/include/fpu.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)npx.h	5.3 (Berkeley) 1/18/91
+ * $FreeBSD$
+ */
+
+/*
+ * Floating Point Data Structures and Constants
+ * W. Jolitz 1/90
+ */
+
+#ifndef _MACHINE_FPU_H_
+#define	_MACHINE_FPU_H_
+
+#include <x86/fpu.h>
+
+#ifdef _KERNEL
+
+struct fpu_kern_ctx;
+
+#define	PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0)
+
+#define	XSAVE_AREA_ALIGN	64
+
+void	fpudna(void);
+void	fpudrop(void);
+void	fpuexit(struct thread *td);
+int	fpuformat(void);
+int	fpugetregs(struct thread *td);
+void	fpuinit(void);
+void	fpurestore(void *addr);
+void	fpusave(void *addr);
+int	fpusetregs(struct thread *td, struct savefpu *addr,
+	    char *xfpustate, size_t xfpustate_size);
+int	fpusetxstate(struct thread *td, char *xfpustate,
+	    size_t xfpustate_size);
+int	fputrap_sse(void);
+int	fputrap_x87(void);
+void	fpuuserinited(struct thread *td);
+struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
+void	fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
+int	fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx,
+	    u_int flags);
+int	fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
+int	fpu_kern_thread(u_int flags);
+int	is_fpu_kern_thread(u_int flags);
+
+struct savefpu	*fpu_save_area_alloc(void);
+void	fpu_save_area_free(struct savefpu *fsa);
+void	fpu_save_area_reset(struct savefpu *fsa);
+
+/*
+ * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread().
+ */
+#define	FPU_KERN_NORMAL	0x0000
+#define	FPU_KERN_NOWAIT	0x0001
+
+#endif
+
+#endif /* !_MACHINE_FPU_H_ */
diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h
new file mode 100644
index 0000000..0953be7
--- /dev/null
+++ b/sys/amd64/include/frame.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/frame.h>
diff --git a/sys/amd64/include/gdb_machdep.h b/sys/amd64/include/gdb_machdep.h
new file mode 100644
index 0000000..d8c25b9
--- /dev/null
+++ b/sys/amd64/include/gdb_machdep.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2004 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GDB_MACHDEP_H_
+#define	_MACHINE_GDB_MACHDEP_H_
+
+#define	GDB_BUFSZ	(GDB_NREGS * 16)
+#define	GDB_NREGS	56
+#define	GDB_REG_PC	16
+
+static __inline size_t
+gdb_cpu_regsz(int regnum)
+{
+	return ((regnum > 16 && regnum < 24) ? 4 : 8);
+}
+
+static __inline int
+gdb_cpu_query(void)
+{
+	return (0);
+}
+
+void *gdb_cpu_getreg(int, size_t *);
+void gdb_cpu_setreg(int, void *);
+int gdb_cpu_signal(int, int);
+
+#endif /* !_MACHINE_GDB_MACHDEP_H_ */
diff --git a/sys/amd64/include/ieeefp.h b/sys/amd64/include/ieeefp.h
new file mode 100644
index 0000000..a403660
--- /dev/null
+++ b/sys/amd64/include/ieeefp.h
@@ -0,0 +1,308 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1990 Andrew Moore, Talke Studio
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * 	from: @(#) ieeefp.h 	1.0 (Berkeley) 9/23/93
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_IEEEFP_H_
+#define _MACHINE_IEEEFP_H_
+
+/*
+ * Deprecated historical FPU control interface
+ *
+ * IEEE floating point type, constant and function definitions.
+ * XXX: {FP,SSE}*FLD and {FP,SSE}*OFF are undocumented pollution.
+ */
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+/*
+ * Rounding modes.
+ */
+typedef enum {
+	FP_RN=0,	/* round to nearest */
+	FP_RM,		/* round down towards minus infinity */
+	FP_RP,		/* round up towards plus infinity */
+	FP_RZ		/* truncate */
+} fp_rnd_t;
+
+/*
+ * Precision (i.e., rounding precision) modes.
+ */
+typedef enum {
+	FP_PS=0,	/* 24 bit (single-precision) */
+	FP_PRS,		/* reserved */
+	FP_PD,		/* 53 bit (double-precision) */
+	FP_PE		/* 64 bit (extended-precision) */
+} fp_prec_t;
+
+#define fp_except_t	int
+
+/*
+ * Exception bit masks.
+ */
+#define FP_X_INV	0x01	/* invalid operation */
+#define FP_X_DNML	0x02	/* denormal */
+#define FP_X_DZ		0x04	/* zero divide */
+#define FP_X_OFL	0x08	/* overflow */
+#define FP_X_UFL	0x10	/* underflow */
+#define FP_X_IMP	0x20	/* (im)precision */
+#define FP_X_STK	0x40	/* stack fault */
+
+/*
+ * FPU control word bit-field masks.
+ */
+#define FP_MSKS_FLD	0x3f	/* exception masks field */
+#define FP_PRC_FLD	0x300	/* precision control field */
+#define	FP_RND_FLD	0xc00	/* rounding control field */
+
+/*
+ * FPU status word bit-field masks.
+ */
+#define FP_STKY_FLD	0x3f	/* sticky flags field */
+
+/*
+ * SSE mxcsr register bit-field masks.
+ */
+#define	SSE_STKY_FLD	0x3f	/* exception flags */
+#define	SSE_DAZ_FLD	0x40	/* Denormals are zero */
+#define	SSE_MSKS_FLD	0x1f80	/* exception masks field */
+#define	SSE_RND_FLD	0x6000	/* rounding control */
+#define	SSE_FZ_FLD	0x8000	/* flush to zero on underflow */
+
+/*
+ * FPU control word bit-field offsets (shift counts).
+ */
+#define FP_MSKS_OFF	0	/* exception masks offset */
+#define FP_PRC_OFF	8	/* precision control offset */
+#define	FP_RND_OFF	10	/* rounding control offset */
+
+/*
+ * FPU status word bit-field offsets (shift counts).
+ */
+#define FP_STKY_OFF	0	/* sticky flags offset */
+
+/*
+ * SSE mxcsr register bit-field offsets (shift counts).
+ */
+#define	SSE_STKY_OFF	0	/* exception flags offset */
+#define	SSE_DAZ_OFF	6	/* DAZ exception mask offset */
+#define	SSE_MSKS_OFF	7	/* other exception masks offset */
+#define	SSE_RND_OFF	13	/* rounding control offset */
+#define	SSE_FZ_OFF	15	/* flush to zero offset */
+
+#ifdef __GNUCLIKE_ASM
+
+#define	__fldcw(addr)	__asm __volatile("fldcw %0" : : "m" (*(addr)))
+#define	__fldenv(addr)	__asm __volatile("fldenv %0" : : "m" (*(addr)))
+#define	__fnstcw(addr)	__asm __volatile("fnstcw %0" : "=m" (*(addr)))
+#define	__fnstenv(addr)	__asm __volatile("fnstenv %0" : "=m" (*(addr)))
+#define	__fnstsw(addr)	__asm __volatile("fnstsw %0" : "=m" (*(addr)))
+#define	__ldmxcsr(addr)	__asm __volatile("ldmxcsr %0" : : "m" (*(addr)))
+#define	__stmxcsr(addr)	__asm __volatile("stmxcsr %0" : "=m" (*(addr)))
+
+/*
+ * Load the control word.  Be careful not to trap if there is a currently
+ * unmasked exception (ones that will become freshly unmasked are not a
+ * problem).  This case must be handled by a save/restore of the
+ * environment or even of the full x87 state.  Accessing the environment
+ * is very inefficient, so only do it when necessary.
+ */
+static __inline void
+__fnldcw(unsigned short _cw, unsigned short _newcw)
+{
+	struct {
+		unsigned _cw;
+		unsigned _other[6];
+	} _env;
+	unsigned short _sw;
+
+	if ((_cw & FP_MSKS_FLD) != FP_MSKS_FLD) {
+		__fnstsw(&_sw);
+		if (((_sw & ~_cw) & FP_STKY_FLD) != 0) {
+			__fnstenv(&_env);
+			_env._cw = _newcw;
+			__fldenv(&_env);
+			return;
+		}
+	}
+	__fldcw(&_newcw);
+}
+
+/*
+ * General notes about conflicting SSE vs FP status bits.
+ * This code assumes that software will not fiddle with the control
+ * bits of the SSE and x87 in such a way to get them out of sync and
+ * still expect this to work.  Break this at your peril.
+ * Because I based this on the i386 port, the x87 state is used for
+ * the fpget*() functions, and is shadowed into the SSE state for
+ * the fpset*() functions.  For dual source fpget*() functions, I
+ * merge the two together.  I think.
+ */
+
+static __inline fp_rnd_t
+__fpgetround(void)
+{
+	unsigned short _cw;
+
+	__fnstcw(&_cw);
+	return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF));
+}
+
+static __inline fp_rnd_t
+__fpsetround(fp_rnd_t _m)
+{
+	fp_rnd_t _p;
+	unsigned _mxcsr;
+	unsigned short _cw, _newcw;
+
+	__fnstcw(&_cw);
+	_p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF);
+	_newcw = _cw & ~FP_RND_FLD;
+	_newcw |= (_m << FP_RND_OFF) & FP_RND_FLD;
+	__fnldcw(_cw, _newcw);
+	__stmxcsr(&_mxcsr);
+	_mxcsr &= ~SSE_RND_FLD;
+	_mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD;
+	__ldmxcsr(&_mxcsr);
+	return (_p);
+}
+
+/*
+ * Get or set the rounding precision for x87 arithmetic operations.
+ * There is no equivalent SSE mode or control.
+ */
+
+static __inline fp_prec_t
+__fpgetprec(void)
+{
+	unsigned short _cw;
+
+	__fnstcw(&_cw);
+	return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF));
+}
+
+static __inline fp_prec_t
+__fpsetprec(fp_prec_t _m)
+{
+	fp_prec_t _p;
+	unsigned short _cw, _newcw;
+
+	__fnstcw(&_cw);
+	_p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF);
+	_newcw = _cw & ~FP_PRC_FLD;
+	_newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD;
+	__fnldcw(_cw, _newcw);
+	return (_p);
+}
+
+/*
+ * Get or set the exception mask.
+ * Note that the x87 mask bits are inverted by the API -- a mask bit of 1
+ * means disable for x87 and SSE, but for fp*mask() it means enable.
+ */
+
+static __inline fp_except_t
+__fpgetmask(void)
+{
+	unsigned short _cw;
+
+	__fnstcw(&_cw);
+	return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF);
+}
+
+static __inline fp_except_t
+__fpsetmask(fp_except_t _m)
+{
+	fp_except_t _p;
+	unsigned _mxcsr;
+	unsigned short _cw, _newcw;
+
+	__fnstcw(&_cw);
+	_p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF;
+	_newcw = _cw & ~FP_MSKS_FLD;
+	_newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD;
+	__fnldcw(_cw, _newcw);
+	__stmxcsr(&_mxcsr);
+	/* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */
+	_mxcsr &= ~SSE_MSKS_FLD;
+	_mxcsr |= (~_m << SSE_MSKS_OFF) & SSE_MSKS_FLD;
+	__ldmxcsr(&_mxcsr);
+	return (_p);
+}
+
+static __inline fp_except_t
+__fpgetsticky(void)
+{
+	unsigned _ex, _mxcsr;
+	unsigned short _sw;
+
+	__fnstsw(&_sw);
+	_ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF;
+	__stmxcsr(&_mxcsr);
+	_ex |= (_mxcsr & SSE_STKY_FLD) >> SSE_STKY_OFF;
+	return ((fp_except_t)_ex);
+}
+
+#endif /* __GNUCLIKE_ASM */
+
+#if !defined(__IEEEFP_NOINLINES__) && defined(__GNUCLIKE_ASM)
+
+#define	fpgetmask()	__fpgetmask()
+#define	fpgetprec()	__fpgetprec()
+#define	fpgetround()	__fpgetround()
+#define	fpgetsticky()	__fpgetsticky()
+#define	fpsetmask(m)	__fpsetmask(m)
+#define	fpsetprec(m)	__fpsetprec(m)
+#define	fpsetround(m)	__fpsetround(m)
+
+#else /* !(!__IEEEFP_NOINLINES__ && __GNUCLIKE_ASM) */
+
+/* Augment the userland declarations. */
+__BEGIN_DECLS
+extern fp_rnd_t    fpgetround(void);
+extern fp_rnd_t    fpsetround(fp_rnd_t);
+extern fp_except_t fpgetmask(void);
+extern fp_except_t fpsetmask(fp_except_t);
+extern fp_except_t fpgetsticky(void);
+extern fp_except_t fpsetsticky(fp_except_t);
+fp_prec_t	fpgetprec(void);
+fp_prec_t	fpsetprec(fp_prec_t);
+__END_DECLS
+
+#endif /* !__IEEEFP_NOINLINES__ && __GNUCLIKE_ASM */
+
+#endif /* !_MACHINE_IEEEFP_H_ */
diff --git a/sys/amd64/include/in_cksum.h b/sys/amd64/include/in_cksum.h
new file mode 100644
index 0000000..156035e
--- /dev/null
+++ b/sys/amd64/include/in_cksum.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from tahoe:	in_cksum.c	1.2	86/01/05
+ *	from:		@(#)in_cksum.c	1.3 (Berkeley) 1/19/91
+ *	from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_IN_CKSUM_H_
+#define	_MACHINE_IN_CKSUM_H_	1
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+#include <sys/cdefs.h>
+
+#define in_cksum(m, len)	in_cksum_skip(m, len, 0)
+
+#if defined(IPVERSION) && (IPVERSION == 4)
+/*
+ * It it useful to have an Internet checksum routine which is inlineable
+ * and optimized specifically for the task of computing IP header checksums
+ * in the normal case (where there are no options and the header length is
+ * therefore always exactly five 32-bit words.
+ */
+#ifdef __CC_SUPPORTS___INLINE
+
+static __inline void
+in_cksum_update(struct ip *ip)
+{
+	int __tmpsum;
+	__tmpsum = (int)ntohs(ip->ip_sum) + 256;
+	ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16));
+}
+
+#else
+
+#define	in_cksum_update(ip) \
+	do { \
+		int __tmpsum; \
+		__tmpsum = (int)ntohs(ip->ip_sum) + 256; \
+		ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \
+	} while(0)
+
+#endif
+#endif
+
+#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
+u_int in_cksum_hdr(const struct ip *ip);
+#endif
+u_short	in_addword(u_short sum, u_short b);
+u_short	in_pseudo(u_int sum, u_int b, u_int c);
+u_short	in_cksum_skip(struct mbuf *m, int len, int skip);
+#endif
+
+#endif /* _MACHINE_IN_CKSUM_H_ */
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
new file mode 100644
index 0000000..8671605
--- /dev/null
+++ b/sys/amd64/include/intr_machdep.h
@@ -0,0 +1,174 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __MACHINE_INTR_MACHDEP_H__
+#define	__MACHINE_INTR_MACHDEP_H__
+
+#ifdef _KERNEL
+
+/*
+ * The maximum number of I/O interrupts we allow.  This number is rather
+ * arbitrary as it is just the maximum IRQ resource value.  The interrupt
+ * source for a given IRQ maps that I/O interrupt to device interrupt
+ * source whether it be a pin on an interrupt controller or an MSI interrupt.
+ * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
+ * interrupts allocate IDT vectors on demand.  Currently we have 191 IDT
+ * vectors available for device interrupts.  On many systems with I/O APICs,
+ * a lot of the IRQs are not used, so this number can be much larger than
+ * 191 and still be safe since only interrupt sources in actual use will
+ * allocate IDT vectors.
+ *
+ * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
+ * IRQ values beyond 256 are used by MSI.  We leave 255 unused to avoid
+ * confusion since 255 is used in PCI to indicate an invalid IRQ.
+ */
+#define	NUM_MSI_INTS	512
+#define	FIRST_MSI_INT	256
+#define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS)
+
+/*
+ * Default base address for MSI messages on x86 platforms.
+ */
+#define	MSI_INTEL_ADDR_BASE		0xfee00000
+
+/*
+ * - 1 ??? dummy counter.
+ * - 2 counters for each I/O interrupt.
+ * - 1 counter for each CPU for lapic timer.
+ * - 8 counters for each CPU for IPI counters for SMP.
+ */
+#ifdef SMP
+#define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU)
+#else
+#define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + 1)
+#endif
+
+#ifndef LOCORE
+
+typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+
+#define	IDTVEC(name)	__CONCAT(X,name)
+
+struct intsrc;
+
+/*
+ * Methods that a PIC provides to mask/unmask a given interrupt source,
+ * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
+ * return the vector associated with this source.
+ */
+struct pic {
+	void (*pic_enable_source)(struct intsrc *);
+	void (*pic_disable_source)(struct intsrc *, int);
+	void (*pic_eoi_source)(struct intsrc *);
+	void (*pic_enable_intr)(struct intsrc *);
+	void (*pic_disable_intr)(struct intsrc *);
+	int (*pic_vector)(struct intsrc *);
+	int (*pic_source_pending)(struct intsrc *);
+	void (*pic_suspend)(struct pic *);
+	void (*pic_resume)(struct pic *);
+	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
+	    enum intr_polarity);
+	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
+	TAILQ_ENTRY(pic) pics;
+};
+
+/* Flags for pic_disable_source() */
+enum {
+	PIC_EOI,
+	PIC_NO_EOI,
+};
+
+/*
+ * An interrupt source.  The upper-layer code uses the PIC methods to
+ * control a given source.  The lower-layer PIC drivers can store additional
+ * private data in a given interrupt source such as an interrupt pin number
+ * or an I/O APIC pointer.
+ */
+struct intsrc {
+	struct pic *is_pic;
+	struct intr_event *is_event;
+	u_long *is_count;
+	u_long *is_straycount;
+	u_int is_index;
+	u_int is_handlers;
+};
+
+struct trapframe;
+
+/*
+ * The following data structure holds per-cpu data, and is placed just
+ * above the top of the space used for the NMI stack.
+ */
+struct nmi_pcpu {
+	register_t	np_pcpu;
+	register_t	__padding;	/* pad to 16 bytes */
+};
+
+extern struct mtx icu_lock;
+extern int elcr_found;
+
+#ifndef DEV_ATPIC
+void	atpic_reset(void);
+#endif
+/* XXX: The elcr_* prototypes probably belong somewhere else. */
+int	elcr_probe(void);
+enum intr_trigger elcr_read_trigger(u_int irq);
+void	elcr_resume(void);
+void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
+void	intr_add_cpu(u_int cpu);
+#endif
+int	intr_add_handler(const char *name, int vector, driver_filter_t filter, 
+			 driver_intr_t handler, void *arg, enum intr_type flags, 
+			 void **cookiep);    
+#ifdef SMP
+int	intr_bind(u_int vector, u_char cpu);
+#endif
+int	intr_config_intr(int vector, enum intr_trigger trig,
+    enum intr_polarity pol);
+int	intr_describe(u_int vector, void *ih, const char *descr);
+void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
+u_int	intr_next_cpu(void);
+struct intsrc *intr_lookup_source(int vector);
+int	intr_register_pic(struct pic *pic);
+int	intr_register_source(struct intsrc *isrc);
+int	intr_remove_handler(void *cookie);
+void	intr_resume(void);
+void	intr_suspend(void);
+void	intrcnt_add(const char *name, u_long **countp);
+void	nexus_add_irq(u_long irq);
+int	msi_alloc(device_t dev, int count, int maxcount, int *irqs);
+void	msi_init(void);
+int	msi_map(int irq, uint64_t *addr, uint32_t *data);
+int	msi_release(int *irqs, int count);
+int	msix_alloc(device_t dev, int *irq);
+int	msix_release(int irq);
+
+#endif	/* !LOCORE */
+#endif	/* _KERNEL */
+#endif	/* !__MACHINE_INTR_MACHDEP_H__ */
diff --git a/sys/amd64/include/iodev.h b/sys/amd64/include/iodev.h
new file mode 100644
index 0000000..9f53cac
--- /dev/null
+++ b/sys/amd64/include/iodev.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2004 Mark R V Murray
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _MACHINE_IODEV_H_
+#define	_MACHINE_IODEV_H_
+
+#ifdef _KERNEL
+#include <machine/cpufunc.h>
+
+#define	iodev_read_1	inb
+#define	iodev_read_2	inw
+#define	iodev_read_4	inl
+#define	iodev_write_1	outb
+#define	iodev_write_2	outw
+#define	iodev_write_4	outl
+
+int	 iodev_open(struct thread *td);
+int	 iodev_close(struct thread *td);
+int	 iodev_ioctl(u_long cmd, caddr_t data);
+
+#endif /* _KERNEL */
+#endif /* _MACHINE_IODEV_H_ */
diff --git a/sys/amd64/include/kdb.h b/sys/amd64/include/kdb.h
new file mode 100644
index 0000000..56d2018
--- /dev/null
+++ b/sys/amd64/include/kdb.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2004 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_KDB_H_
+#define _MACHINE_KDB_H_
+
+#include <machine/frame.h>
+#include <machine/psl.h>
+
+#define	KDB_STOPPEDPCB(pc)	&stoppcbs[pc->pc_cpuid]
+
+static __inline void
+kdb_cpu_clear_singlestep(void)
+{
+	kdb_frame->tf_rflags &= ~PSL_T;
+}
+
+static __inline void
+kdb_cpu_set_singlestep(void)
+{
+	kdb_frame->tf_rflags |= PSL_T;
+}
+
+static __inline void
+kdb_cpu_sync_icache(unsigned char *addr, size_t size)
+{
+}
+
+static __inline void
+kdb_cpu_trap(int type, int code)
+{
+}
+
+#endif /* _MACHINE_KDB_H_ */
diff --git a/sys/amd64/include/limits.h b/sys/amd64/include/limits.h
new file mode 100644
index 0000000..35eea1f
--- /dev/null
+++ b/sys/amd64/include/limits.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)limits.h	8.3 (Berkeley) 1/4/94
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_LIMITS_H_
+#define	_MACHINE_LIMITS_H_
+
+#include <sys/cdefs.h>
+
+#ifdef __CC_SUPPORTS_WARNING
+#warning "machine/limits.h is deprecated.  Include sys/limits.h instead."
+#endif
+
+#include <sys/limits.h>
+
+#endif /* !_MACHINE_LIMITS_H_ */
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
new file mode 100644
index 0000000..5d7cb74
--- /dev/null
+++ b/sys/amd64/include/md_var.h
@@ -0,0 +1,121 @@
+/*-
+ * Copyright (c) 1995 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MD_VAR_H_
+#define	_MACHINE_MD_VAR_H_
+
+/*
+ * Miscellaneous machine-dependent declarations.
+ */
+
+extern	long	Maxmem;
+extern	u_int	basemem;
+extern	int	busdma_swi_pending;
+extern	u_int	cpu_exthigh;
+extern	u_int	cpu_feature;
+extern	u_int	cpu_feature2;
+extern	u_int	amd_feature;
+extern	u_int	amd_feature2;
+extern	u_int	amd_pminfo;
+extern	u_int	via_feature_rng;
+extern	u_int	via_feature_xcrypt;
+extern	u_int	cpu_clflush_line_size;
+extern	u_int	cpu_stdext_feature;
+extern	u_int	cpu_fxsr;
+extern	u_int	cpu_high;
+extern	u_int	cpu_id;
+extern	u_int	cpu_max_ext_state_size;
+extern	u_int	cpu_mxcsr_mask;
+extern	u_int	cpu_procinfo;
+extern	u_int	cpu_procinfo2;
+extern	char	cpu_vendor[];
+extern	u_int	cpu_vendor_id;
+extern	char	ctx_switch_xsave[];
+extern	char	kstack[];
+extern	char	sigcode[];
+extern	int	szsigcode;
+extern	uint64_t *vm_page_dump;
+extern	int	vm_page_dump_size;
+extern	int	workaround_erratum383;
+extern	int	_udatasel;
+extern	int	_ucodesel;
+extern	int	_ucode32sel;
+extern	int	_ufssel;
+extern	int	_ugssel;
+extern	int	use_xsave;
+extern	uint64_t xsave_mask;
+
+typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+struct	pcb;
+struct	savefpu;
+struct	thread;
+struct	reg;
+struct	fpreg;
+struct  dbreg;
+struct	dumperinfo;
+
+void	*alloc_fpusave(int flags);
+void	amd64_syscall(struct thread *td, int traced);
+void	busdma_swi(void);
+void	cpu_setregs(void);
+void	ctx_fpusave(void *);
+void	doreti_iret(void) __asm(__STRING(doreti_iret));
+void	doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
+void	ld_ds(void) __asm(__STRING(ld_ds));
+void	ld_es(void) __asm(__STRING(ld_es));
+void	ld_fs(void) __asm(__STRING(ld_fs));
+void	ld_gs(void) __asm(__STRING(ld_gs));
+void	ld_fsbase(void) __asm(__STRING(ld_fsbase));
+void	ld_gsbase(void) __asm(__STRING(ld_gsbase));
+void	ds_load_fault(void) __asm(__STRING(ds_load_fault));
+void	es_load_fault(void) __asm(__STRING(es_load_fault));
+void	fs_load_fault(void) __asm(__STRING(fs_load_fault));
+void	gs_load_fault(void) __asm(__STRING(gs_load_fault));
+void	fsbase_load_fault(void) __asm(__STRING(fsbase_load_fault));
+void	gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault));
+void	dump_add_page(vm_paddr_t);
+void	dump_drop_page(vm_paddr_t);
+void	initializecpu(void);
+void	initializecpucache(void);
+void	fillw(int /*u_short*/ pat, void *base, size_t cnt);
+void	fpstate_drop(struct thread *td);
+int	is_physical_memory(vm_paddr_t addr);
+int	isa_nmi(int cd);
+void	pagecopy(void *from, void *to);
+void	pagezero(void *addr);
+void	setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist);
+int	user_dbreg_trap(void);
+void	minidumpsys(struct dumperinfo *);
+struct savefpu *get_pcb_user_save_td(struct thread *td);
+struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
+struct pcb *get_pcb_td(struct thread *td);
+
+#endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/sys/amd64/include/memdev.h b/sys/amd64/include/memdev.h
new file mode 100644
index 0000000..649b557
--- /dev/null
+++ b/sys/amd64/include/memdev.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2004 Mark R V Murray
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MEMDEV_H_
+#define	_MACHINE_MEMDEV_H_
+
+#define	CDEV_MINOR_MEM	0
+#define	CDEV_MINOR_KMEM	1
+
+d_open_t	memopen;
+d_read_t	memrw;
+d_ioctl_t	memioctl;
+d_mmap_t	memmmap;
+
+#endif /* _MACHINE_MEMDEV_H_ */
diff --git a/sys/amd64/include/metadata.h b/sys/amd64/include/metadata.h
new file mode 100644
index 0000000..4c7ec9e
--- /dev/null
+++ b/sys/amd64/include/metadata.h
@@ -0,0 +1,35 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm <peter@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_METADATA_H_
+#define	_MACHINE_METADATA_H_
+
+#define	MODINFOMD_SMAP		0x1001
+#define	MODINFOMD_SMAP_XATTR	0x1002
+
+#endif /* !_MACHINE_METADATA_H_ */
diff --git a/sys/amd64/include/minidump.h b/sys/amd64/include/minidump.h
new file mode 100644
index 0000000..2ac529c
--- /dev/null
+++ b/sys/amd64/include/minidump.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2006 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_MACHINE_MINIDUMP_H_
+#define	_MACHINE_MINIDUMP_H_ 1
+
+#define	MINIDUMP_MAGIC		"minidump FreeBSD/amd64"
+#define	MINIDUMP_VERSION	2
+
+struct minidumphdr {
+	char magic[24];
+	uint32_t version;
+	uint32_t msgbufsize;
+	uint32_t bitmapsize;
+	uint32_t pmapsize;
+	uint64_t kernbase;
+	uint64_t dmapbase;
+	uint64_t dmapend;
+};
+
+#endif /* _MACHINE_MINIDUMP_H_ */
diff --git a/sys/amd64/include/mp_watchdog.h b/sys/amd64/include/mp_watchdog.h
new file mode 100644
index 0000000..bcec051
--- /dev/null
+++ b/sys/amd64/include/mp_watchdog.h
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2004 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_MACHINE_MP_WATCHDOG_H_
+#define	_MACHINE_MP_WATCHDOG_H_
+
+void	ap_watchdog(u_int cpuid);
+
+#endif /* !_MACHINE_MP_WATCHDOG_H_ */
diff --git a/sys/amd64/include/nexusvar.h b/sys/amd64/include/nexusvar.h
new file mode 100644
index 0000000..c39a686
--- /dev/null
+++ b/sys/amd64/include/nexusvar.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_NEXUSVAR_H_
+#define	_MACHINE_NEXUSVAR_H_
+
+struct nexus_device {
+	struct resource_list	nx_resources;
+};
+
+DECLARE_CLASS(nexus_driver);
+
+extern struct rman irq_rman, drq_rman, port_rman, mem_rman;
+
+void	nexus_init_resources(void);
+
+#endif /* !_MACHINE_NEXUSVAR_H_ */
diff --git a/sys/amd64/include/npx.h b/sys/amd64/include/npx.h
new file mode 100644
index 0000000..ec70f1c
--- /dev/null
+++ b/sys/amd64/include/npx.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/fpu.h>
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
new file mode 100644
index 0000000..9ddcf68
--- /dev/null
+++ b/sys/amd64/include/param.h
@@ -0,0 +1,142 @@
+/*-
+ * Copyright (c) 2002 David E. O'Brien.  All rights reserved.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and Ralph Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)param.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+
+#ifndef _AMD64_INCLUDE_PARAM_H_
+#define	_AMD64_INCLUDE_PARAM_H_
+
+#include <machine/_align.h>
+
+/*
+ * Machine dependent constants for AMD64.
+ */
+
+
+#define __HAVE_ACPI
+#define __PCI_REROUTE_INTERRUPT
+
+#ifndef MACHINE
+#define	MACHINE		"amd64"
+#endif
+#ifndef MACHINE_ARCH
+#define	MACHINE_ARCH	"amd64"
+#endif
+#ifndef MACHINE_ARCH32
+#define	MACHINE_ARCH32	"i386"
+#endif
+
+#if defined(SMP) || defined(KLD_MODULE)
+#ifndef MAXCPU
+#define MAXCPU		64
+#endif
+#else
+#define MAXCPU		1
+#endif
+
+#define	ALIGNBYTES		_ALIGNBYTES
+#define	ALIGN(p)		_ALIGN(p)
+/*
+ * ALIGNED_POINTER is a boolean macro that checks whether an address
+ * is valid to fetch data elements of type t from on this architecture.
+ * This does not reflect the optimal alignment, just the possibility
+ * (within reasonable limits). 
+ */
+#define	ALIGNED_POINTER(p, t)	1
+
+/*
+ * CACHE_LINE_SIZE is the compile-time maximum cache line size for an
+ * architecture.  It should be used with appropriate caution.
+ */
+#define	CACHE_LINE_SHIFT	7
+#define	CACHE_LINE_SIZE		(1 << CACHE_LINE_SHIFT)
+
+/* Size of the level 1 page table units */
+#define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
+#define	NPTEPGSHIFT	9		/* LOG2(NPTEPG) */
+#define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
+#define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
+#define PAGE_MASK	(PAGE_SIZE-1)
+/* Size of the level 2 page directory units */
+#define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+#define	NPDEPGSHIFT	9		/* LOG2(NPDEPG) */
+#define	PDRSHIFT	21              /* LOG2(NBPDR) */
+#define	NBPDR		(1<<PDRSHIFT)   /* bytes/page dir */
+#define	PDRMASK		(NBPDR-1)
+/* Size of the level 3 page directory pointer table units */
+#define	NPDPEPG		(PAGE_SIZE/(sizeof (pdp_entry_t)))
+#define	NPDPEPGSHIFT	9		/* LOG2(NPDPEPG) */
+#define	PDPSHIFT	30		/* LOG2(NBPDP) */
+#define	NBPDP		(1<<PDPSHIFT)	/* bytes/page dir ptr table */
+#define	PDPMASK		(NBPDP-1)
+/* Size of the level 4 page-map level-4 table units */
+#define	NPML4EPG	(PAGE_SIZE/(sizeof (pml4_entry_t)))
+#define	NPML4EPGSHIFT	9		/* LOG2(NPML4EPG) */
+#define	PML4SHIFT	39		/* LOG2(NBPML4) */
+#define	NBPML4		(1UL<<PML4SHIFT)/* bytes/page map lev4 table */
+#define	PML4MASK	(NBPML4-1)
+
+#define	MAXPAGESIZES	3	/* maximum number of supported page sizes */
+
+#define IOPAGES	2		/* pages of i/o permission bitmap */
+
+#ifndef	KSTACK_PAGES
+#define	KSTACK_PAGES	4	/* pages of kstack (with pcb) */
+#endif
+#define	KSTACK_GUARD_PAGES 1	/* pages of kstack guard; 0 disables */
+
+/*
+ * Mach derived conversion macros
+ */
+#define	round_page(x)	((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
+#define	trunc_page(x)	((unsigned long)(x) & ~(PAGE_MASK))
+#define trunc_2mpage(x)	((unsigned long)(x) & ~PDRMASK)
+#define round_2mpage(x)	((((unsigned long)(x)) + PDRMASK) & ~PDRMASK)
+#define trunc_1gpage(x)	((unsigned long)(x) & ~PDPMASK)
+
+#define	atop(x)		((unsigned long)(x) >> PAGE_SHIFT)
+#define	ptoa(x)		((unsigned long)(x) << PAGE_SHIFT)
+
+#define	amd64_btop(x)	((unsigned long)(x) >> PAGE_SHIFT)
+#define	amd64_ptob(x)	((unsigned long)(x) << PAGE_SHIFT)
+
+#define	pgtok(x)	((unsigned long)(x) * (PAGE_SIZE / 1024)) 
+
+#endif /* !_AMD64_INCLUDE_PARAM_H_ */
diff --git a/sys/amd64/include/pc/bios.h b/sys/amd64/include/pc/bios.h
new file mode 100644
index 0000000..e7d568e
--- /dev/null
+++ b/sys/amd64/include/pc/bios.h
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 1997 Michael Smith
+ * Copyright (c) 1998 Jonathan Lemon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PC_BIOS_H_
+#define _MACHINE_PC_BIOS_H_
+
+/*
+ * Int 15:E820 'SMAP' structure
+ */
+#define SMAP_SIG	0x534D4150			/* 'SMAP' */
+
+#define	SMAP_TYPE_MEMORY	1
+#define	SMAP_TYPE_RESERVED	2
+#define	SMAP_TYPE_ACPI_RECLAIM	3
+#define	SMAP_TYPE_ACPI_NVS	4
+#define	SMAP_TYPE_ACPI_ERROR	5
+
+#define	SMAP_XATTR_ENABLED	0x00000001
+#define	SMAP_XATTR_NON_VOLATILE	0x00000002
+#define	SMAP_XATTR_MASK		(SMAP_XATTR_ENABLED | SMAP_XATTR_NON_VOLATILE)
+
+struct bios_smap {
+    u_int64_t	base;
+    u_int64_t	length;
+    u_int32_t	type;
+} __packed;
+
+/*
+ * System Management BIOS
+ */
+#define	SMBIOS_START	0xf0000
+#define	SMBIOS_STEP	0x10
+#define	SMBIOS_OFF	0
+#define	SMBIOS_LEN	4
+#define	SMBIOS_SIG	"_SM_"
+
+struct smbios_eps {
+	uint8_t		anchor_string[4];		/* '_SM_' */
+	uint8_t		checksum;
+	uint8_t		length;
+	uint8_t		major_version;
+	uint8_t		minor_version;
+	uint16_t	maximum_structure_size;
+	uint8_t		entry_point_revision;
+	uint8_t		formatted_area[5];
+	uint8_t		intermediate_anchor_string[5];	/* '_DMI_' */
+	uint8_t		intermediate_checksum;
+	uint16_t	structure_table_length;
+	uint32_t	structure_table_address;
+	uint16_t	number_structures;
+	uint8_t		BCD_revision;
+};
+
+struct smbios_structure_header {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	handle;
+};
+
+#ifdef _KERNEL
+#define BIOS_PADDRTOVADDR(x)	((x) + KERNBASE)
+#define BIOS_VADDRTOPADDR(x)	((x) - KERNBASE)
+
+struct bios_oem_signature {
+	char * anchor;		/* search anchor string in BIOS memory */
+	size_t offset;		/* offset from anchor (may be negative) */
+	size_t totlen;		/* total length of BIOS string to copy */
+} __packed;
+
+struct bios_oem_range {
+	u_int from;		/* shouldn't be below 0xe0000 */
+	u_int to;		/* shouldn't be above 0xfffff */
+} __packed;
+
+struct bios_oem {
+	struct bios_oem_range range;
+	struct bios_oem_signature signature[];
+} __packed;
+
+int	bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen);
+uint32_t bios_sigsearch(uint32_t start, u_char *sig, int siglen, int paralen,
+	    int sigofs);
+#endif
+
+#endif /* _MACHINE_PC_BIOS_H_ */
diff --git a/sys/amd64/include/pc/display.h b/sys/amd64/include/pc/display.h
new file mode 100644
index 0000000..cd2d5ff
--- /dev/null
+++ b/sys/amd64/include/pc/display.h
@@ -0,0 +1,45 @@
+/*
+ * IBM PC display definitions
+ *
+ * $FreeBSD$
+ */
+
+/* Color attributes for foreground text */
+
+#define	FG_BLACK		   0
+#define	FG_BLUE			   1
+#define	FG_GREEN		   2
+#define	FG_CYAN			   3
+#define	FG_RED			   4
+#define	FG_MAGENTA		   5
+#define	FG_BROWN		   6
+#define	FG_LIGHTGREY		   7
+#define	FG_DARKGREY		   8
+#define	FG_LIGHTBLUE		   9
+#define	FG_LIGHTGREEN		  10
+#define	FG_LIGHTCYAN		  11
+#define	FG_LIGHTRED		  12
+#define	FG_LIGHTMAGENTA		  13
+#define	FG_YELLOW		  14
+#define	FG_WHITE		  15
+#define	FG_BLINK		0x80
+
+/* Color attributes for text background */
+
+#define	BG_BLACK		0x00
+#define	BG_BLUE			0x10
+#define	BG_GREEN		0x20
+#define	BG_CYAN			0x30
+#define	BG_RED			0x40
+#define	BG_MAGENTA		0x50
+#define	BG_BROWN		0x60
+#define	BG_LIGHTGREY		0x70
+
+/* Monochrome attributes for foreground text */
+
+#define	FG_UNDERLINE		0x01
+#define	FG_INTENSE		0x08
+
+/* Monochrome attributes for text background */
+
+#define	BG_INTENSE		0x10
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
new file mode 100644
index 0000000..22cbbe2
--- /dev/null
+++ b/sys/amd64/include/pcb.h
@@ -0,0 +1,149 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)pcb.h	5.10 (Berkeley) 5/12/91
+ * $FreeBSD$
+ */
+
+#ifndef _AMD64_PCB_H_
+#define _AMD64_PCB_H_
+
+/*
+ * AMD64 process control block
+ */
+#include <machine/fpu.h>
+#include <machine/segments.h>
+
+struct pcb {
+	register_t	pcb_r15;
+	register_t	pcb_r14;
+	register_t	pcb_r13;
+	register_t	pcb_r12;
+	register_t	pcb_rbp;
+	register_t	pcb_rsp;
+	register_t	pcb_rbx;
+	register_t	pcb_rip;
+	register_t	pcb_fsbase;
+	register_t	pcb_gsbase;
+	register_t	pcb_kgsbase;
+	register_t	pcb_cr0;
+	register_t	pcb_cr2;
+	register_t	pcb_cr3;
+	register_t	pcb_cr4;
+	register_t	pcb_dr0;
+	register_t	pcb_dr1;
+	register_t	pcb_dr2;
+	register_t	pcb_dr3;
+	register_t	pcb_dr6;
+	register_t	pcb_dr7;
+
+	struct region_descriptor pcb_gdt;
+	struct region_descriptor pcb_idt;
+	struct region_descriptor pcb_ldt;
+	uint16_t	pcb_tr;
+
+	u_int		pcb_flags;
+#define	PCB_FULL_IRET	0x01	/* full iret is required */
+#define	PCB_DBREGS	0x02	/* process using debug registers */
+#define	PCB_KERNFPU	0x04	/* kernel uses fpu */
+#define	PCB_FPUINITDONE	0x08	/* fpu state is initialized */
+#define	PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */
+#define	PCB_GS32BIT	0x20	/* linux gs switch */
+#define	PCB_32BIT	0x40	/* process has 32 bit context (segs etc) */
+
+	uint16_t	pcb_initial_fpucw;
+
+	/* copyin/out fault recovery */
+	caddr_t		pcb_onfault;
+
+	/* 32-bit segment descriptor */
+	struct user_segment_descriptor pcb_gs32sd;
+
+	/* local tss, with i/o bitmap; NULL for common */
+	struct amd64tss *pcb_tssp;
+
+	/* model specific registers */
+	register_t	pcb_efer;
+	register_t	pcb_star;
+	register_t	pcb_lstar;
+	register_t	pcb_cstar;
+	register_t	pcb_sfmask;
+	register_t	pcb_xsmask;
+
+	/* fpu context for suspend/resume */
+	void		*pcb_fpususpend;
+
+	struct savefpu	*pcb_save;
+
+	uint64_t	pcb_pad[3];
+};
+
+#ifdef _KERNEL
+struct trapframe;
+
+/*
+ * The pcb_flags is only modified by current thread, or by other threads
+ * when current thread is stopped.  However, current thread may change it
+ * from the interrupt context in cpu_switch(), or in the trap handler.
+ * When we read-modify-write pcb_flags from C sources, compiler may generate
+ * code that is not atomic regarding the interrupt handler.  If a trap or
+ * interrupt happens and any flag is modified from the handler, it can be
+ * clobbered with the cached value later.  Therefore, we implement setting
+ * and clearing flags with single-instruction functions, which do not race
+ * with possible modification of the flags from the trap or interrupt context,
+ * because traps and interrupts are executed only on instruction boundary.
+ */
+static __inline void
+set_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+
+	__asm __volatile("orl %1,%0"
+	    : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
+	    : "cc");
+}
+
+static __inline void
+clear_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+
+	__asm __volatile("andl %1,%0"
+	    : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
+	    : "cc");
+}
+
+void	makectx(struct trapframe *, struct pcb *);
+int	savectx(struct pcb *) __returns_twice;
+void	resumectx(struct pcb *);
+
+#endif
+
+#endif /* _AMD64_PCB_H_ */
diff --git a/sys/amd64/include/pci_cfgreg.h b/sys/amd64/include/pci_cfgreg.h
new file mode 100644
index 0000000..717d5cc
--- /dev/null
+++ b/sys/amd64/include/pci_cfgreg.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/pci_cfgreg.h>
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
new file mode 100644
index 0000000..bb7d339
--- /dev/null
+++ b/sys/amd64/include/pcpu.h
@@ -0,0 +1,261 @@
+/*-
+ * Copyright (c) Peter Wemm <peter@netplex.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PCPU_H_
+#define	_MACHINE_PCPU_H_
+
+#ifndef _SYS_CDEFS_H_
+#error "sys/cdefs.h is a prerequisite for this file"
+#endif
+
+#if defined(XEN) || defined(XENHVM)
+#ifndef NR_VIRQS
+#define	NR_VIRQS	24
+#endif
+#ifndef NR_IPIS
+#define	NR_IPIS		2
+#endif
+#endif
+
+#ifdef XENHVM
+#define PCPU_XEN_FIELDS							\
+	;								\
+	unsigned int pc_last_processed_l1i;				\
+	unsigned int pc_last_processed_l2i
+#else
+#define PCPU_XEN_FIELDS
+#endif
+
+/*
+ * The SMP parts are setup in pmap.c and locore.s for the BSP, and
+ * mp_machdep.c sets up the data for the AP's to "see" when they awake.
+ * The reason for doing it via a struct is so that an array of pointers
+ * to each CPU's data can be set up for things like "check curproc on all
+ * other processors"
+ */
+#define	PCPU_MD_FIELDS							\
+	char	pc_monitorbuf[128] __aligned(128); /* cache line */	\
+	struct	pcpu *pc_prvspace;	/* Self-reference */		\
+	struct	pmap *pc_curpmap;					\
+	struct	amd64tss *pc_tssp;	/* TSS segment active on CPU */	\
+	struct	amd64tss *pc_commontssp;/* Common TSS for the CPU */	\
+	register_t pc_rsp0;						\
+	register_t pc_scratch_rsp;	/* User %rsp in syscall */	\
+	u_int	pc_apic_id;						\
+	u_int   pc_acpi_id;		/* ACPI CPU id */		\
+	/* Pointer to the CPU %fs descriptor */				\
+	struct user_segment_descriptor	*pc_fs32p;			\
+	/* Pointer to the CPU %gs descriptor */				\
+	struct user_segment_descriptor	*pc_gs32p;			\
+	/* Pointer to the CPU LDT descriptor */				\
+	struct system_segment_descriptor *pc_ldt;			\
+	/* Pointer to the CPU TSS descriptor */				\
+	struct system_segment_descriptor *pc_tss;			\
+	u_int	pc_cmci_mask		/* MCx banks for CMCI */	\
+	PCPU_XEN_FIELDS;						\
+	char	__pad[293]		/* be divisor of PAGE_SIZE	\
+					   after cache alignment */
+
+#ifdef _KERNEL
+
+#ifdef lint
+
+extern struct pcpu *pcpup;
+
+#define	PCPU_GET(member)	(pcpup->pc_ ## member)
+#define	PCPU_ADD(member, val)	(pcpup->pc_ ## member += (val))
+#define	PCPU_INC(member)	PCPU_ADD(member, 1)
+#define	PCPU_PTR(member)	(&pcpup->pc_ ## member)
+#define	PCPU_SET(member, val)	(pcpup->pc_ ## member = (val))
+
+#elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
+
+/*
+ * Evaluates to the byte offset of the per-cpu variable name.
+ */
+#define	__pcpu_offset(name)						\
+	__offsetof(struct pcpu, name)
+
+/*
+ * Evaluates to the type of the per-cpu variable name.
+ */
+#define	__pcpu_type(name)						\
+	__typeof(((struct pcpu *)0)->name)
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define	__PCPU_PTR(name) __extension__ ({				\
+	__pcpu_type(name) *__p;						\
+									\
+	__asm __volatile("movq %%gs:%1,%0; addq %2,%0"			\
+	    : "=r" (__p)						\
+	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))),	\
+	      "i" (__pcpu_offset(name)));				\
+									\
+	__p;								\
+})
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define	__PCPU_GET(name) __extension__ ({				\
+	__pcpu_type(name) __res;					\
+	struct __s {							\
+		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
+	} __s;								\
+									\
+	if (sizeof(__res) == 1 || sizeof(__res) == 2 ||			\
+	    sizeof(__res) == 4 || sizeof(__res) == 8) {			\
+		__asm __volatile("mov %%gs:%1,%0"			\
+		    : "=r" (__s)					\
+		    : "m" (*(struct __s *)(__pcpu_offset(name))));	\
+		*(struct __s *)(void *)&__res = __s;			\
+	} else {							\
+		__res = *__PCPU_PTR(name);				\
+	}								\
+	__res;								\
+})
+
+/*
+ * Adds the value to the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_ADD(name, val) do {					\
+	__pcpu_type(name) __val;					\
+	struct __s {							\
+		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
+	} __s;								\
+									\
+	__val = (val);							\
+	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
+	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
+		__s = *(struct __s *)(void *)&__val;			\
+		__asm __volatile("add %1,%%gs:%0"			\
+		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
+		    : "r" (__s));					\
+	} else								\
+		*__PCPU_PTR(name) += __val;				\
+} while (0)
+
+/*
+ * Increments the value of the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_INC(name) do {						\
+	CTASSERT(sizeof(__pcpu_type(name)) == 1 ||			\
+	    sizeof(__pcpu_type(name)) == 2 ||				\
+	    sizeof(__pcpu_type(name)) == 4 ||				\
+	    sizeof(__pcpu_type(name)) == 8);				\
+	if (sizeof(__pcpu_type(name)) == 1) {				\
+		__asm __volatile("incb %%gs:%0"				\
+		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
+		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
+	} else if (sizeof(__pcpu_type(name)) == 2) {			\
+		__asm __volatile("incw %%gs:%0"				\
+		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
+		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
+	} else if (sizeof(__pcpu_type(name)) == 4) {			\
+		__asm __volatile("incl %%gs:%0"				\
+		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
+		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
+	} else if (sizeof(__pcpu_type(name)) == 8) {			\
+		__asm __volatile("incq %%gs:%0"				\
+		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
+		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
+	}								\
+} while (0)
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define	__PCPU_SET(name, val) {						\
+	__pcpu_type(name) __val;					\
+	struct __s {							\
+		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
+	} __s;								\
+									\
+	__val = (val);							\
+	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
+	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
+		__s = *(struct __s *)(void *)&__val;			\
+		__asm __volatile("mov %1,%%gs:%0"			\
+		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
+		    : "r" (__s));					\
+	} else {							\
+		*__PCPU_PTR(name) = __val;				\
+	}								\
+}
+
+#define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
+#define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
+#define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
+#define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
+#define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
+
+#define	OFFSETOF_CURTHREAD	0
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnull-dereference"
+#endif
+static __inline __pure2 struct thread *
+__curthread(void)
+{
+	struct thread *td;
+
+	__asm("movq %%gs:%1,%0" : "=r" (td)
+	    : "m" (*(char *)OFFSETOF_CURTHREAD));
+	return (td);
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+#define	curthread		(__curthread())
+
+#define	OFFSETOF_CURPCB		32
+static __inline __pure2 struct pcb *
+__curpcb(void)
+{
+	struct pcb *pcb;
+
+	__asm("movq %%gs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
+	return (pcb);
+}
+#define	curpcb		(__curpcb())
+
+#define	IS_BSP()	(PCPU_GET(cpuid) == 0)
+
+#else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
+
+#error "this file needs to be ported to your compiler"
+
+#endif /* lint, etc. */
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_PCPU_H_ */
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
new file mode 100644
index 0000000..6d76ec3
--- /dev/null
+++ b/sys/amd64/include/pmap.h
@@ -0,0 +1,337 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Derived from hp300 version by Mike Hibler, this version by William
+ * Jolitz uses a recursive map [a pde points to the page directory] to
+ * map the page tables using the pagetables themselves. This is done to
+ * reduce the impact on kernel virtual memory for lots of sparse address
+ * space, and to reduce the cost of memory to each process.
+ *
+ *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
+ *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PMAP_H_
+#define	_MACHINE_PMAP_H_
+
+/*
+ * Page-directory and page-table entries follow this format, with a few
+ * of the fields not present here and there, depending on a lot of things.
+ */
+				/* ---- Intel Nomenclature ---- */
+#define	PG_V		0x001	/* P	Valid			*/
+#define PG_RW		0x002	/* R/W	Read/Write		*/
+#define PG_U		0x004	/* U/S  User/Supervisor		*/
+#define	PG_NC_PWT	0x008	/* PWT	Write through		*/
+#define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
+#define PG_A		0x020	/* A	Accessed		*/
+#define	PG_M		0x040	/* D	Dirty			*/
+#define	PG_PS		0x080	/* PS	Page size (0=4k,1=2M)	*/
+#define	PG_PTE_PAT	0x080	/* PAT	PAT index		*/
+#define	PG_G		0x100	/* G	Global			*/
+#define	PG_AVAIL1	0x200	/*    /	Available for system	*/
+#define	PG_AVAIL2	0x400	/*   <	programmers use		*/
+#define	PG_AVAIL3	0x800	/*    \				*/
+#define	PG_PDE_PAT	0x1000	/* PAT	PAT index		*/
+#define	PG_NX		(1ul<<63) /* No-execute */
+
+
+/* Our various interpretations of the above */
+#define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
+#define	PG_MANAGED	PG_AVAIL2
+#define	PG_FRAME	(0x000ffffffffff000ul)
+#define	PG_PS_FRAME	(0x000fffffffe00000ul)
+#define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
+#define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
+
+/* Page level cache control fields used to determine the PAT type */
+#define PG_PDE_CACHE	(PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
+#define PG_PTE_CACHE	(PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
+
+/*
+ * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB
+ * (PTE) page mappings have identical settings for the following fields:
+ */
+#define	PG_PTE_PROMOTE	(PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
+	    PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
+
+/*
+ * Page Protection Exception bits
+ */
+
+#define PGEX_P		0x01	/* Protection violation vs. not present */
+#define PGEX_W		0x02	/* during a Write cycle */
+#define PGEX_U		0x04	/* access from User mode (UPL) */
+#define PGEX_RSV	0x08	/* reserved PTE field is non-zero */
+#define PGEX_I		0x10	/* during an instruction fetch */
+
+/*
+ * Pte related macros.  This is complicated by having to deal with
+ * the sign extension of the 48th bit.
+ */
+#define KVADDR(l4, l3, l2, l1) ( \
+	((unsigned long)-1 << 47) | \
+	((unsigned long)(l4) << PML4SHIFT) | \
+	((unsigned long)(l3) << PDPSHIFT) | \
+	((unsigned long)(l2) << PDRSHIFT) | \
+	((unsigned long)(l1) << PAGE_SHIFT))
+
+#define UVADDR(l4, l3, l2, l1) ( \
+	((unsigned long)(l4) << PML4SHIFT) | \
+	((unsigned long)(l3) << PDPSHIFT) | \
+	((unsigned long)(l2) << PDRSHIFT) | \
+	((unsigned long)(l1) << PAGE_SHIFT))
+
+#define NKPML4E		1		/* number of kernel PML4 slots */
+
+#define	NUPML4E		(NPML4EPG/2)	/* number of userland PML4 pages */
+#define	NUPDPE		(NUPML4E*NPDPEPG)/* number of userland PDP pages */
+#define	NUPDE		(NUPDPE*NPDEPG)	/* number of userland PD entries */
+
+/*
+ * NDMPML4E is the number of PML4 entries that are used to implement the
+ * direct map.  It must be a power of two.
+ */
+#define	NDMPML4E	2
+
+/*
+ * The *PDI values control the layout of virtual memory.  The starting address
+ * of the direct map, which is controlled by DMPML4I, must be a multiple of
+ * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ */
+#define	PML4PML4I	(NPML4EPG/2)	/* Index of recursive pml4 mapping */
+
+#define	KPML4I		(NPML4EPG-1)	/* Top 512GB for KVM */
+#define	DMPML4I		rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below KVM */
+
+#define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
+
+/*
+ * XXX doesn't really belong here I guess...
+ */
+#define ISA_HOLE_START    0xa0000
+#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <sys/_cpuset.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+
+#include <vm/_vm_radix.h>
+
+typedef u_int64_t pd_entry_t;
+typedef u_int64_t pt_entry_t;
+typedef u_int64_t pdp_entry_t;
+typedef u_int64_t pml4_entry_t;
+
+#define	PML4ESHIFT	(3)
+#define	PDPESHIFT	(3)
+#define	PTESHIFT	(3)
+#define	PDESHIFT	(3)
+
+/*
+ * Address of current address space page table maps and directories.
+ */
+#ifdef _KERNEL
+#define	addr_PTmap	(KVADDR(PML4PML4I, 0, 0, 0))
+#define	addr_PDmap	(KVADDR(PML4PML4I, PML4PML4I, 0, 0))
+#define	addr_PDPmap	(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
+#define	addr_PML4map	(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
+#define	addr_PML4pml4e	(addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
+#define	PTmap		((pt_entry_t *)(addr_PTmap))
+#define	PDmap		((pd_entry_t *)(addr_PDmap))
+#define	PDPmap		((pd_entry_t *)(addr_PDPmap))
+#define	PML4map		((pd_entry_t *)(addr_PML4map))
+#define	PML4pml4e	((pd_entry_t *)(addr_PML4pml4e))
+
+extern int nkpt;		/* Initial number of kernel page tables */
+extern u_int64_t KPDPphys;	/* physical address of kernel level 3 */
+extern u_int64_t KPML4phys;	/* physical address of kernel level 4 */
+
+/*
+ * virtual address to page table entry and
+ * to physical address.
+ * Note: these work recursively, thus vtopte of a pte will give
+ * the corresponding pde that in turn maps it.
+ */
+pt_entry_t *vtopte(vm_offset_t);
+#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+
+static __inline pt_entry_t
+pte_load(pt_entry_t *ptep)
+{
+	pt_entry_t r;
+
+	r = *ptep;
+	return (r);
+}
+
+static __inline pt_entry_t
+pte_load_store(pt_entry_t *ptep, pt_entry_t pte)
+{
+	pt_entry_t r;
+
+	__asm __volatile(
+	    "xchgq %0,%1"
+	    : "=m" (*ptep),
+	      "=r" (r)
+	    : "1" (pte),
+	      "m" (*ptep));
+	return (r);
+}
+
+#define	pte_load_clear(pte)	atomic_readandclear_long(pte)
+
+static __inline void
+pte_store(pt_entry_t *ptep, pt_entry_t pte)
+{
+
+	*ptep = pte;
+}
+
+#define	pte_clear(ptep)		pte_store((ptep), (pt_entry_t)0ULL)
+
+#define	pde_store(pdep, pde)	pte_store((pdep), (pde))
+
+extern pt_entry_t pg_nx;
+
+#endif /* _KERNEL */
+
+/*
+ * Pmap stuff
+ */
+struct	pv_entry;
+struct	pv_chunk;
+
+struct md_page {
+	TAILQ_HEAD(,pv_entry)	pv_list;
+	int			pat_mode;
+};
+
+/*
+ * The kernel virtual address (KVA) of the level 4 page table page is always
+ * within the direct map (DMAP) region.
+ */
+struct pmap {
+	struct mtx		pm_mtx;
+	pml4_entry_t		*pm_pml4;	/* KVA of level 4 page table */
+	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
+	cpuset_t		pm_active;	/* active on cpus */
+	/* spare u_int here due to padding */
+	struct pmap_statistics	pm_stats;	/* pmap statistics */
+	struct vm_radix		pm_root;	/* spare page table pages */
+};
+
+typedef struct pmap	*pmap_t;
+
+#ifdef _KERNEL
+extern struct pmap	kernel_pmap_store;
+#define kernel_pmap	(&kernel_pmap_store)
+
+#define	PMAP_LOCK(pmap)		mtx_lock(&(pmap)->pm_mtx)
+#define	PMAP_LOCK_ASSERT(pmap, type) \
+				mtx_assert(&(pmap)->pm_mtx, (type))
+#define	PMAP_LOCK_DESTROY(pmap)	mtx_destroy(&(pmap)->pm_mtx)
+#define	PMAP_LOCK_INIT(pmap)	mtx_init(&(pmap)->pm_mtx, "pmap", \
+				    NULL, MTX_DEF | MTX_DUPOK)
+#define	PMAP_LOCKED(pmap)	mtx_owned(&(pmap)->pm_mtx)
+#define	PMAP_MTX(pmap)		(&(pmap)->pm_mtx)
+#define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
+#define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
+#endif
+
+/*
+ * For each vm_page_t, there is a list of all currently valid virtual
+ * mappings of that page.  An entry is a pv_entry_t, the list is pv_list.
+ */
+typedef struct pv_entry {
+	vm_offset_t	pv_va;		/* virtual address for mapping */
+	TAILQ_ENTRY(pv_entry)	pv_next;
+} *pv_entry_t;
+
+/*
+ * pv_entries are allocated in chunks per-process.  This avoids the
+ * need to track per-pmap assignments.
+ */
+#define	_NPCM	3
+#define	_NPCPV	168
+struct pv_chunk {
+	pmap_t			pc_pmap;
+	TAILQ_ENTRY(pv_chunk)	pc_list;
+	uint64_t		pc_map[_NPCM];	/* bitmap; 1 = free */
+	TAILQ_ENTRY(pv_chunk)	pc_lru;
+	struct pv_entry		pc_pventry[_NPCPV];
+};
+
+#ifdef	_KERNEL
+
+extern caddr_t	CADDR1;
+extern pt_entry_t *CMAP1;
+extern vm_paddr_t phys_avail[];
+extern vm_paddr_t dump_avail[];
+extern vm_offset_t virtual_avail;
+extern vm_offset_t virtual_end;
+
+#define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
+#define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
+#define	pmap_unmapbios(va, sz)	pmap_unmapdev((va), (sz))
+
+void	pmap_bootstrap(vm_paddr_t *);
+int	pmap_change_attr(vm_offset_t, vm_size_t, int);
+void	pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
+void	pmap_init_pat(void);
+void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
+void	*pmap_kenter_temporary(vm_paddr_t pa, int i);
+vm_paddr_t pmap_kextract(vm_offset_t);
+void	pmap_kremove(vm_offset_t);
+void	*pmap_mapbios(vm_paddr_t, vm_size_t);
+void	*pmap_mapdev(vm_paddr_t, vm_size_t);
+void	*pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
+boolean_t pmap_page_is_mapped(vm_page_t m);
+void	pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
+void	pmap_unmapdev(vm_offset_t, vm_size_t);
+void	pmap_invalidate_page(pmap_t, vm_offset_t);
+void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
+void	pmap_invalidate_all(pmap_t);
+void	pmap_invalidate_cache(void);
+void	pmap_invalidate_cache_pages(vm_page_t *pages, int count);
+void	pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+
+#endif /* _KERNEL */
+
+#endif /* !LOCORE */
+
+#endif /* !_MACHINE_PMAP_H_ */
diff --git a/sys/amd64/include/pmc_mdep.h b/sys/amd64/include/pmc_mdep.h
new file mode 100644
index 0000000..73c93fe
--- /dev/null
+++ b/sys/amd64/include/pmc_mdep.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 2003-2008 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Machine dependent interfaces */
+
+#ifndef _MACHINE_PMC_MDEP_H
+#define	_MACHINE_PMC_MDEP_H 1
+
+#ifdef	_KERNEL
+struct pmc_mdep;
+#endif
+
+#include <dev/hwpmc/hwpmc_amd.h>
+#include <dev/hwpmc/hwpmc_core.h>
+#include <dev/hwpmc/hwpmc_piv.h>
+#include <dev/hwpmc/hwpmc_tsc.h>
+#include <dev/hwpmc/hwpmc_uncore.h>
+
+/*
+ * Intel processors implementing V2 and later of the Intel performance
+ * measurement architecture have PMCs of the following classes: TSC,
+ * IAF, IAP, UCF and UCP.
+ */
+#define	PMC_MDEP_CLASS_INDEX_TSC	1
+#define	PMC_MDEP_CLASS_INDEX_K8		2
+#define	PMC_MDEP_CLASS_INDEX_P4		2
+#define	PMC_MDEP_CLASS_INDEX_IAP	2
+#define	PMC_MDEP_CLASS_INDEX_IAF	3
+#define	PMC_MDEP_CLASS_INDEX_UCP	4
+#define	PMC_MDEP_CLASS_INDEX_UCF	5
+
+/*
+ * On the amd64 platform we support the following PMCs.
+ *
+ * TSC		The timestamp counter
+ * K8		AMD Athlon64 and Opteron PMCs in 64 bit mode.
+ * PIV		Intel P4/HTT and P4/EMT64
+ * IAP		Intel Core/Core2/Atom CPUs in 64 bits mode.
+ * IAF		Intel fixed-function PMCs in Core2 and later CPUs.
+ * UCP		Intel Uncore programmable PMCs.
+ * UCF		Intel Uncore fixed-function PMCs.
+ */
+
+union pmc_md_op_pmcallocate  {
+	struct pmc_md_amd_op_pmcallocate	pm_amd;
+	struct pmc_md_iaf_op_pmcallocate	pm_iaf;
+	struct pmc_md_iap_op_pmcallocate	pm_iap;
+	struct pmc_md_ucf_op_pmcallocate	pm_ucf;
+	struct pmc_md_ucp_op_pmcallocate	pm_ucp;
+	struct pmc_md_p4_op_pmcallocate		pm_p4;
+	uint64_t				__pad[4];
+};
+
+/* Logging */
+#define	PMCLOG_READADDR		PMCLOG_READ64
+#define	PMCLOG_EMITADDR		PMCLOG_EMIT64
+
+#ifdef	_KERNEL
+
+union pmc_md_pmc {
+	struct pmc_md_amd_pmc	pm_amd;
+	struct pmc_md_iaf_pmc	pm_iaf;
+	struct pmc_md_iap_pmc	pm_iap;
+	struct pmc_md_ucf_pmc	pm_ucf;
+	struct pmc_md_ucp_pmc	pm_ucp;
+	struct pmc_md_p4_pmc	pm_p4;
+};
+
+#define	PMC_TRAPFRAME_TO_PC(TF)	((TF)->tf_rip)
+#define	PMC_TRAPFRAME_TO_FP(TF)	((TF)->tf_rbp)
+#define	PMC_TRAPFRAME_TO_USER_SP(TF)	((TF)->tf_rsp)
+#define	PMC_TRAPFRAME_TO_KERNEL_SP(TF)	((TF)->tf_rsp)
+
+#define	PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I)		\
+	(((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */
+#define	PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I)		\
+	(((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */
+#define	PMC_AT_FUNCTION_EPILOGUE_RET(I)			\
+	(((I) & 0xFF) == 0xC3)		   /* ret */
+
+#define	PMC_IN_TRAP_HANDLER(PC) 			\
+	((PC) >= (uintptr_t) start_exceptions &&	\
+	 (PC) < (uintptr_t) end_exceptions)
+
+#define	PMC_IN_KERNEL_STACK(S,START,END)		\
+	((S) >= (START) && (S) < (END))
+#define	PMC_IN_KERNEL(va) (((va) >= DMAP_MIN_ADDRESS &&			\
+	(va) < DMAP_MAX_ADDRESS) || ((va) >= VM_MIN_KERNEL_ADDRESS &&	\
+	(va) < VM_MAX_KERNEL_ADDRESS))
+
+#define	PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS)
+
+/* Build a fake kernel trapframe from current instruction pointer. */
+#define PMC_FAKE_TRAPFRAME(TF)						\
+	do {								\
+	(TF)->tf_cs = 0; (TF)->tf_rflags = 0;				\
+	__asm __volatile("movq %%rbp,%0" : "=r" ((TF)->tf_rbp));	\
+	__asm __volatile("movq %%rsp,%0" : "=r" ((TF)->tf_rsp));	\
+	__asm __volatile("call 1f \n\t1: pop %0" : "=r"((TF)->tf_rip));	\
+	} while (0)
+
+/*
+ * Prototypes
+ */
+
+void	start_exceptions(void), end_exceptions(void);
+
+struct pmc_mdep *pmc_amd_initialize(void);
+void	pmc_amd_finalize(struct pmc_mdep *_md);
+struct pmc_mdep *pmc_intel_initialize(void);
+void	pmc_intel_finalize(struct pmc_mdep *_md);
+
+#endif /* _KERNEL */
+#endif /* _MACHINE_PMC_MDEP_H */
diff --git a/sys/amd64/include/ppireg.h b/sys/amd64/include/ppireg.h
new file mode 100644
index 0000000..5774757
--- /dev/null
+++ b/sys/amd64/include/ppireg.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (C) 2005 TAKAHASHI Yoshihiro. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PPIREG_H_
+#define _MACHINE_PPIREG_H_
+
+#ifdef _KERNEL
+
+#define	IO_PPI		0x61	/* Programmable Peripheral Interface */
+
+/*
+ * PPI speaker control values
+ */
+
+#define	PIT_ENABLETMR2	0x01	/* Enable timer/counter 2 */
+#define	PIT_SPKRDATA	0x02	/* Direct to speaker */
+
+#define	PIT_SPKR	(PIT_ENABLETMR2 | PIT_SPKRDATA)
+
+#define	ppi_spkr_on()	outb(IO_PPI, inb(IO_PPI) | PIT_SPKR)
+#define	ppi_spkr_off()	outb(IO_PPI, inb(IO_PPI) & ~PIT_SPKR)
+
+#endif /* _KERNEL */
+
+#endif /* _MACHINE_PPIREG_H_ */
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
new file mode 100644
index 0000000..14585fb
--- /dev/null
+++ b/sys/amd64/include/proc.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)proc.h	7.1 (Berkeley) 5/15/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PROC_H_
+#define	_MACHINE_PROC_H_
+
+#include <machine/segments.h>
+
+struct proc_ldt {
+	caddr_t ldt_base;
+	int     ldt_refcnt;
+};
+
+/*
+ * Machine-dependent part of the proc structure for AMD64.
+ */
+struct mdthread {
+	int	md_spinlock_count;	/* (k) */
+	register_t md_saved_flags;	/* (k) */
+	register_t md_spurflt_addr;	/* (k) Spurious page fault address. */
+};
+
+struct mdproc {
+	struct proc_ldt *md_ldt;	/* (t) per-process ldt */
+	struct system_segment_descriptor md_ldt_sd;
+};
+
+#define	KINFO_PROC_SIZE 1088
+#define	KINFO_PROC32_SIZE 768
+
+#ifdef	_KERNEL
+
+/* Get the current kernel thread stack usage. */
+#define GET_STACK_USAGE(total, used) do {				\
+	struct thread	*td = curthread;				\
+	(total) = td->td_kstack_pages * PAGE_SIZE;			\
+	(used) = (char *)td->td_kstack +				\
+	    td->td_kstack_pages * PAGE_SIZE -				\
+	    (char *)&td;						\
+} while (0)
+
+void set_user_ldt(struct mdproc *);
+struct proc_ldt *user_ldt_alloc(struct proc *, int);
+void user_ldt_free(struct thread *);
+void user_ldt_deref(struct proc_ldt *);
+struct sysarch_args;
+int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space);
+int amd64_set_ldt_data(struct thread *td, int start, int num,
+    struct user_segment_descriptor *descs);
+
+extern struct mtx dt_lock;
+extern int max_ldt_segment;
+
+struct syscall_args {
+	u_int code;
+	struct sysent *callp;
+	register_t args[8];
+	int narg;
+};
+#endif  /* _KERNEL */
+
+#endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h
new file mode 100644
index 0000000..8fea371
--- /dev/null
+++ b/sys/amd64/include/profile.h
@@ -0,0 +1,201 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)profile.h	8.1 (Berkeley) 6/11/93
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PROFILE_H_
+#define	_MACHINE_PROFILE_H_
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * Config generates something to tell the compiler to align functions on 16
+ * byte boundaries.  A strict alignment is good for keeping the tables small.
+ */
+#define	FUNCTION_ALIGNMENT	16
+
+/*
+ * The kernel uses assembler stubs instead of unportable inlines.
+ * This is mainly to save a little time when profiling is not enabled,
+ * which is the usual case for the kernel.
+ */
+#define	_MCOUNT_DECL void mcount
+#define	MCOUNT
+
+#ifdef GUPROF
+#define	MCOUNT_DECL(s)
+#define	MCOUNT_ENTER(s)
+#define	MCOUNT_EXIT(s)
+#ifdef __GNUCLIKE_ASM
+#define	MCOUNT_OVERHEAD(label)						\
+	__asm __volatile("pushq %0; call __mcount; popq %%rcx"		\
+			 :						\
+			 : "i" (label)					\
+			 : "ax", "dx", "cx", "di", "si", "r8", "r9", "memory")
+#define	MEXITCOUNT_OVERHEAD()						\
+	__asm __volatile("call .mexitcount; 1:"				\
+			 : :						\
+			 : "ax", "dx", "cx", "di", "si", "r8", "r9", "memory")
+#define	MEXITCOUNT_OVERHEAD_GETLABEL(labelp)				\
+	__asm __volatile("movq $1b,%0" : "=rm" (labelp))
+#elif defined(lint)
+#define	MCOUNT_OVERHEAD(label)
+#define	MEXITCOUNT_OVERHEAD()
+#define	MEXITCOUNT_OVERHEAD_GETLABEL()
+#else
+#error this file needs to be ported to your compiler
+#endif /* !__GNUCLIKE_ASM */
+#else /* !GUPROF */
+#define	MCOUNT_DECL(s)	register_t s;
+#ifdef SMP
+extern int	mcount_lock;
+#define	MCOUNT_ENTER(s)	{ s = intr_disable(); \
+ 			  while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \
+			  	/* nothing */ ; }
+#define	MCOUNT_EXIT(s)	{ atomic_store_rel_int(&mcount_lock, 0); \
+			  intr_restore(s); }
+#else
+#define	MCOUNT_ENTER(s)	{ s = intr_disable(); }
+#define	MCOUNT_EXIT(s)	(intr_restore(s))
+#endif
+#endif /* GUPROF */
+
+void bintr(void);
+void btrap(void);
+void eintr(void);
+void user(void);
+
+#define	MCOUNT_FROMPC_USER(pc)					\
+	((pc < (uintfptr_t)VM_MAXUSER_ADDRESS) ? (uintfptr_t)user : pc)
+
+#define	MCOUNT_FROMPC_INTR(pc)					\
+	((pc >= (uintfptr_t)btrap && pc < (uintfptr_t)eintr) ?	\
+	    ((pc >= (uintfptr_t)bintr) ? (uintfptr_t)bintr :	\
+		(uintfptr_t)btrap) : ~0UL)
+
+#else /* !_KERNEL */
+
+#define	FUNCTION_ALIGNMENT	4
+
+#define	_MCOUNT_DECL \
+static void _mcount(uintfptr_t frompc, uintfptr_t selfpc) __used; \
+static void _mcount
+
+#ifdef __GNUCLIKE_ASM
+#define	MCOUNT __asm("			\n\
+	.text				\n\
+	.p2align 4,0x90			\n\
+	.globl	.mcount			\n\
+	.type	.mcount,@function	\n\
+.mcount:				\n\
+	pushq	%rdi			\n\
+	pushq	%rsi			\n\
+	pushq	%rdx			\n\
+	pushq	%rcx			\n\
+	pushq	%r8			\n\
+	pushq	%r9			\n\
+	pushq	%rax			\n\
+	movq	8(%rbp),%rdi		\n\
+	movq	7*8(%rsp),%rsi		\n\
+	call	_mcount			\n\
+	popq	%rax			\n\
+	popq	%r9			\n\
+	popq	%r8			\n\
+	popq	%rcx			\n\
+	popq	%rdx			\n\
+	popq	%rsi			\n\
+	popq	%rdi			\n\
+	ret				\n\
+	.size	.mcount, . - .mcount");
+#if 0
+/*
+ * We could use this, except it doesn't preserve the registers that were
+ * being passed with arguments to the function that we were inserted
+ * into.  I've left it here as documentation of what the code above is
+ * supposed to do.
+ */
+#define	MCOUNT								\
+void									\
+mcount()								\
+{									\
+	uintfptr_t selfpc, frompc;					\
+	/*								\
+	 * Find the return address for mcount,				\
+	 * and the return address for mcount's caller.			\
+	 *								\
+	 * selfpc = pc pushed by call to mcount				\
+	 */								\
+	__asm("movq 8(%%rbp),%0" : "=r" (selfpc));			\
+	/*								\
+	 * frompc = pc pushed by call to mcount's caller.		\
+	 * The caller's stack frame has already been built, so %rbp is	\
+	 * the caller's frame pointer.  The caller's raddr is in the	\
+	 * caller's frame following the caller's caller's frame pointer.\
+	 */								\
+	__asm("movq (%%rbp),%0" : "=r" (frompc));			\
+	frompc = ((uintfptr_t *)frompc)[1];				\
+	_mcount(frompc, selfpc);					\
+}
+#endif
+#else /* !__GNUCLIKE_ASM */
+#define	MCOUNT
+#endif /* __GNUCLIKE_ASM */
+
+typedef	u_long	uintfptr_t;
+
+#endif /* _KERNEL */
+
+/*
+ * An unsigned integral type that can hold non-negative difference between
+ * function pointers.
+ */
+typedef	u_long	fptrdiff_t;
+
+#ifdef _KERNEL
+
+void	mcount(uintfptr_t frompc, uintfptr_t selfpc);
+
+#else /* !_KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+#ifdef __GNUCLIKE_ASM
+void	mcount(void) __asm(".mcount");
+#endif
+__END_DECLS
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_PROFILE_H_ */
diff --git a/sys/amd64/include/psl.h b/sys/amd64/include/psl.h
new file mode 100644
index 0000000..4d945a1
--- /dev/null
+++ b/sys/amd64/include/psl.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/psl.h>
diff --git a/sys/amd64/include/ptrace.h b/sys/amd64/include/ptrace.h
new file mode 100644
index 0000000..bf86754
--- /dev/null
+++ b/sys/amd64/include/ptrace.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/ptrace.h>
diff --git a/sys/amd64/include/reg.h b/sys/amd64/include/reg.h
new file mode 100644
index 0000000..f6fb2bc
--- /dev/null
+++ b/sys/amd64/include/reg.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/reg.h>
diff --git a/sys/amd64/include/reloc.h b/sys/amd64/include/reloc.h
new file mode 100644
index 0000000..1883193
--- /dev/null
+++ b/sys/amd64/include/reloc.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)reloc.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _I386_MACHINE_RELOC_H_
+#define _I386_MACHINE_RELOC_H_
+
+/* Relocation format. */
+struct relocation_info {
+	int r_address;			  /* offset in text or data segment */
+	unsigned int   r_symbolnum : 24,  /* ordinal number of add symbol */
+			   r_pcrel :  1,  /* 1 if value should be pc-relative */
+			  r_length :  2,  /* log base 2 of value's width */
+			  r_extern :  1,  /* 1 if need to add symbol to value */
+			 r_baserel :  1,  /* linkage table relative */
+			r_jmptable :  1,  /* relocate to jump table */
+			r_relative :  1,  /* load address relative */
+			    r_copy :  1;  /* run time copy */
+};
+
+#endif
diff --git a/sys/amd64/include/resource.h b/sys/amd64/include/resource.h
new file mode 100644
index 0000000..edde5eb
--- /dev/null
+++ b/sys/amd64/include/resource.h
@@ -0,0 +1,44 @@
+/* $FreeBSD$ */
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _MACHINE_RESOURCE_H_
+#define	_MACHINE_RESOURCE_H_	1
+
+/*
+ * Definitions of resource types for Intel Architecture machines
+ * with support for legacy ISA devices and drivers.
+ */
+
+#define	SYS_RES_IRQ	1	/* interrupt lines */
+#define	SYS_RES_DRQ	2	/* isa dma lines */
+#define	SYS_RES_MEMORY	3	/* i/o memory */
+#define	SYS_RES_IOPORT	4	/* i/o ports */
+
+#endif /* !_MACHINE_RESOURCE_H_ */
diff --git a/sys/amd64/include/runq.h b/sys/amd64/include/runq.h
new file mode 100644
index 0000000..855e315
--- /dev/null
+++ b/sys/amd64/include/runq.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_MACHINE_RUNQ_H_
+#define	_MACHINE_RUNQ_H_
+
+#define	RQB_LEN		(1)		/* Number of priority status words. */
+#define	RQB_L2BPW	(6)		/* Log2(sizeof(rqb_word_t) * NBBY)). */
+#define	RQB_BPW		(1<<RQB_L2BPW)	/* Bits in an rqb_word_t. */
+
+#define	RQB_BIT(pri)	(1ul << ((pri) & (RQB_BPW - 1)))
+#define	RQB_WORD(pri)	((pri) >> RQB_L2BPW)
+
+#define	RQB_FFS(word)	(bsfq(word))
+
+/*
+ * Type of run queue status word.
+ */
+typedef	u_int64_t	rqb_word_t;
+
+#endif
diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h
new file mode 100644
index 0000000..d9f4280
--- /dev/null
+++ b/sys/amd64/include/segments.h
@@ -0,0 +1,106 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)segments.h	7.1 (Berkeley) 5/9/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_SEGMENTS_H_
+#define	_MACHINE_SEGMENTS_H_
+
+/*
+ * AMD64 Segmentation Data Structures and definitions
+ */
+
+#include <x86/segments.h>
+
+/*
+ * System segment descriptors (128 bit wide)
+ */
+struct	system_segment_descriptor {
+	u_int64_t sd_lolimit:16;	/* segment extent (lsb) */
+	u_int64_t sd_lobase:24;		/* segment base address (lsb) */
+	u_int64_t sd_type:5;		/* segment type */
+	u_int64_t sd_dpl:2;		/* segment descriptor priority level */
+	u_int64_t sd_p:1;		/* segment descriptor present */
+	u_int64_t sd_hilimit:4;		/* segment extent (msb) */
+	u_int64_t sd_xx0:3;		/* unused */
+	u_int64_t sd_gran:1;		/* limit granularity (byte/page units)*/
+	u_int64_t sd_hibase:40 __packed;/* segment base address  (msb) */
+	u_int64_t sd_xx1:8;
+	u_int64_t sd_mbz:5;		/* MUST be zero */
+	u_int64_t sd_xx2:19;
+} __packed;
+
+/*
+ * Software definitions are in this convenient format,
+ * which are translated into inconvenient segment descriptors
+ * when needed to be used by the 386 hardware
+ */
+
+struct	soft_segment_descriptor {
+	unsigned long ssd_base;		/* segment base address  */
+	unsigned long ssd_limit;	/* segment extent */
+	unsigned long ssd_type:5;	/* segment type */
+	unsigned long ssd_dpl:2;	/* segment descriptor priority level */
+	unsigned long ssd_p:1;		/* segment descriptor present */
+	unsigned long ssd_long:1;	/* long mode (for %cs) */
+	unsigned long ssd_def32:1;	/* default 32 vs 16 bit size */
+	unsigned long ssd_gran:1;	/* limit granularity (byte/page units)*/
+} __packed;
+
+/*
+ * region descriptors, used to load gdt/idt tables before segments yet exist.
+ */
+struct region_descriptor {
+	unsigned long rd_limit:16;		/* segment extent */
+	unsigned long rd_base:64 __packed;	/* base address  */
+} __packed;
+
+#ifdef _KERNEL
+extern struct user_segment_descriptor gdt[];
+extern struct soft_segment_descriptor gdt_segs[];
+extern struct gate_descriptor *idt;
+extern struct region_descriptor r_gdt, r_idt;
+
+void	lgdt(struct region_descriptor *rdp);
+void	sdtossd(struct user_segment_descriptor *sdp,
+	    struct soft_segment_descriptor *ssdp);
+void	ssdtosd(struct soft_segment_descriptor *ssdp,
+	    struct user_segment_descriptor *sdp);
+void	ssdtosyssd(struct soft_segment_descriptor *ssdp,
+	    struct system_segment_descriptor *sdp);
+void	update_gdt_gsbase(struct thread *td, uint32_t base);
+void	update_gdt_fsbase(struct thread *td, uint32_t base);
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_SEGMENTS_H_ */
diff --git a/sys/amd64/include/setjmp.h b/sys/amd64/include/setjmp.h
new file mode 100644
index 0000000..c4101a7
--- /dev/null
+++ b/sys/amd64/include/setjmp.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/setjmp.h>
diff --git a/sys/amd64/include/sf_buf.h b/sys/amd64/include/sf_buf.h
new file mode 100644
index 0000000..b5245e6
--- /dev/null
+++ b/sys/amd64/include/sf_buf.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_SF_BUF_H_
+#define _MACHINE_SF_BUF_H_
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+
+/*
+ * On this machine, the only purpose for which sf_buf is used is to implement
+ * an opaque pointer required by the machine-independent parts of the kernel.
+ * That pointer references the vm_page that is "mapped" by the sf_buf.  The
+ * actual mapping is provided by the direct virtual-to-physical mapping.  
+ */
+struct sf_buf;
+
+static __inline vm_offset_t
+sf_buf_kva(struct sf_buf *sf)
+{
+
+	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
+}
+
+static __inline vm_page_t
+sf_buf_page(struct sf_buf *sf)
+{
+
+	return ((vm_page_t)sf);
+}
+
+#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/amd64/include/sigframe.h b/sys/amd64/include/sigframe.h
new file mode 100644
index 0000000..d5cdb56
--- /dev/null
+++ b/sys/amd64/include/sigframe.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/sigframe.h>
diff --git a/sys/amd64/include/signal.h b/sys/amd64/include/signal.h
new file mode 100644
index 0000000..db9fe6a
--- /dev/null
+++ b/sys/amd64/include/signal.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/signal.h>
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
new file mode 100644
index 0000000..16d87ea
--- /dev/null
+++ b/sys/amd64/include/smp.h
@@ -0,0 +1,82 @@
+/*-
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _MACHINE_SMP_H_
+#define _MACHINE_SMP_H_
+
+#ifdef _KERNEL
+
+#ifdef SMP
+
+#ifndef LOCORE
+
+#include <sys/bus.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/pcb.h>
+
+/* global symbols in mpboot.S */
+extern char			mptramp_start[];
+extern char			mptramp_end[];
+extern u_int32_t		mptramp_pagetables;
+
+/* global data in mp_machdep.c */
+extern int			mp_naps;
+extern int			boot_cpu_id;
+extern struct pcb		stoppcbs[];
+extern int			cpu_apic_ids[];
+#ifdef COUNT_IPIS
+extern u_long *ipi_invltlb_counts[MAXCPU];
+extern u_long *ipi_invlrng_counts[MAXCPU];
+extern u_long *ipi_invlpg_counts[MAXCPU];
+extern u_long *ipi_invlcache_counts[MAXCPU];
+extern u_long *ipi_rendezvous_counts[MAXCPU];
+#endif
+
+/* IPI handlers */
+inthand_t
+	IDTVEC(invltlb),	/* TLB shootdowns - global */
+	IDTVEC(invlpg),		/* TLB shootdowns - 1 page */
+	IDTVEC(invlrng),	/* TLB shootdowns - page range */
+	IDTVEC(invlcache),	/* Write back and invalidate cache */
+	IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ 
+	IDTVEC(cpustop),	/* CPU stops & waits to be restarted */
+	IDTVEC(cpususpend),	/* CPU suspends & waits to be resumed */
+	IDTVEC(rendezvous);	/* handle CPU rendezvous */
+
+/* functions in mp_machdep.c */
+void	cpu_add(u_int apic_id, char boot_cpu);
+void	cpustop_handler(void);
+void	cpususpend_handler(void);
+void	init_secondary(void);
+void	ipi_startup(int apic_id, int vector);
+void	ipi_all_but_self(u_int ipi);
+void 	ipi_bitmap_handler(struct trapframe frame);
+void	ipi_cpu(int cpu, u_int ipi);
+int	ipi_nmi_handler(void);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
+u_int	mp_bootaddress(u_int);
+void	smp_cache_flush(void);
+void	smp_invlpg(vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
+void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
+	    vm_offset_t endva);
+void	smp_invltlb(void);
+void	smp_masked_invltlb(cpuset_t mask);
+
+#endif /* !LOCORE */
+#endif /* SMP */
+
+#endif /* _KERNEL */
+#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h
new file mode 100644
index 0000000..aace4bf
--- /dev/null
+++ b/sys/amd64/include/specialreg.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/specialreg.h>
diff --git a/sys/amd64/include/stack.h b/sys/amd64/include/stack.h
new file mode 100644
index 0000000..24e2547
--- /dev/null
+++ b/sys/amd64/include/stack.h
@@ -0,0 +1,44 @@
+/*-
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_STACK_H_
+#define	_MACHINE_STACK_H_
+
+/*
+ * Stack trace.
+ */
+#define	INKERNEL(va) (((va) >= DMAP_MIN_ADDRESS && (va) < DMAP_MAX_ADDRESS) \
+	    || ((va) >= VM_MIN_KERNEL_ADDRESS && (va) < VM_MAX_KERNEL_ADDRESS))
+
+struct amd64_frame {
+	struct amd64_frame	*f_frame;
+	long			f_retaddr;
+	long			f_arg0;
+};
+
+#endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/amd64/include/stdarg.h b/sys/amd64/include/stdarg.h
new file mode 100644
index 0000000..1f80090
--- /dev/null
+++ b/sys/amd64/include/stdarg.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/stdarg.h>
diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h
new file mode 100644
index 0000000..cd380d4
--- /dev/null
+++ b/sys/amd64/include/sysarch.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/sysarch.h>
diff --git a/sys/amd64/include/timerreg.h b/sys/amd64/include/timerreg.h
new file mode 100644
index 0000000..cf5f281
--- /dev/null
+++ b/sys/amd64/include/timerreg.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (C) 2005 TAKAHASHI Yoshihiro. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The outputs of the three timers are connected as follows:
+ *
+ *	 timer 0 -> irq 0
+ *	 timer 1 -> dma chan 0 (for dram refresh)
+ * 	 timer 2 -> speaker (via keyboard controller)
+ *
+ * Timer 0 is used to call hardclock.
+ * Timer 2 is used to generate console beeps.
+ */
+
+#ifndef _MACHINE_TIMERREG_H_
+#define _MACHINE_TIMERREG_H_
+
+#ifdef _KERNEL
+
+#include <dev/ic/i8253reg.h>
+
+#define	IO_TIMER1	0x40		/* 8253 Timer #1 */
+#define	TIMER_CNTR0	(IO_TIMER1 + TIMER_REG_CNTR0)
+#define	TIMER_CNTR1	(IO_TIMER1 + TIMER_REG_CNTR1)
+#define	TIMER_CNTR2	(IO_TIMER1 + TIMER_REG_CNTR2)
+#define	TIMER_MODE	(IO_TIMER1 + TIMER_REG_MODE)
+
+#endif /* _KERNEL */
+
+#endif /* _MACHINE_TIMERREG_H_ */
diff --git a/sys/amd64/include/trap.h b/sys/amd64/include/trap.h
new file mode 100644
index 0000000..4d95077
--- /dev/null
+++ b/sys/amd64/include/trap.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/trap.h>
diff --git a/sys/amd64/include/tss.h b/sys/amd64/include/tss.h
new file mode 100644
index 0000000..fbbe3af
--- /dev/null
+++ b/sys/amd64/include/tss.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)tss.h	5.4 (Berkeley) 1/18/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_TSS_H_
+#define _MACHINE_TSS_H_ 1
+
+/*
+ * amd64 Context Data Type
+ *
+ * The alignment is pretty messed up here due to reuse of the original 32 bit
+ * fields.  It might be worth trying to set the tss on a +4 byte offset to
+ * make the 64 bit fields aligned in practice.
+ */
+struct amd64tss {
+	u_int32_t	tss_rsvd0;
+	u_int64_t	tss_rsp0 __packed; 	/* kernel stack pointer ring 0 */
+	u_int64_t	tss_rsp1 __packed; 	/* kernel stack pointer ring 1 */
+	u_int64_t	tss_rsp2 __packed; 	/* kernel stack pointer ring 2 */
+	u_int32_t	tss_rsvd1;
+	u_int32_t	tss_rsvd2;
+	u_int64_t	tss_ist1 __packed;	/* Interrupt stack table 1 */
+	u_int64_t	tss_ist2 __packed;	/* Interrupt stack table 2 */
+	u_int64_t	tss_ist3 __packed;	/* Interrupt stack table 3 */
+	u_int64_t	tss_ist4 __packed;	/* Interrupt stack table 4 */
+	u_int64_t	tss_ist5 __packed;	/* Interrupt stack table 5 */
+	u_int64_t	tss_ist6 __packed;	/* Interrupt stack table 6 */
+	u_int64_t	tss_ist7 __packed;	/* Interrupt stack table 7 */
+	u_int32_t	tss_rsvd3;
+	u_int32_t	tss_rsvd4;
+	u_int16_t	tss_rsvd5;
+	u_int16_t	tss_iobase;	/* io bitmap offset */
+};
+
+#ifdef _KERNEL
+extern struct amd64tss common_tss[];
+#endif
+
+#endif /* _MACHINE_TSS_H_ */
diff --git a/sys/amd64/include/ucontext.h b/sys/amd64/include/ucontext.h
new file mode 100644
index 0000000..aea80e3
--- /dev/null
+++ b/sys/amd64/include/ucontext.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/ucontext.h>
diff --git a/sys/amd64/include/varargs.h b/sys/amd64/include/varargs.h
new file mode 100644
index 0000000..93faac6
--- /dev/null
+++ b/sys/amd64/include/varargs.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2002 David E. O'Brien.  All rights reserved.
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)varargs.h	8.2 (Berkeley) 3/22/94
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_VARARGS_H_
+#define	_MACHINE_VARARGS_H_
+
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+
+#ifdef __GNUCLIKE_BUILTIN_VARARGS
+
+#include <sys/_types.h>
+
+#ifndef _VA_LIST_DECLARED
+#define	_VA_LIST_DECLARED
+typedef	__va_list	va_list;
+#endif
+
+typedef int __builtin_va_alist_t __attribute__((__mode__(__word__)));
+
+#define	va_alist		__builtin_va_alist
+#define	va_dcl			__builtin_va_alist_t __builtin_va_alist; ...
+#define	va_start(ap)		__builtin_varargs_start(ap)
+#define	va_arg(ap, type)	__builtin_va_arg((ap), type)
+#define	va_end(ap)		__builtin_va_end(ap)
+
+#else	/* !__GNUCLIKE_BUILTIN_VARARGS */
+
+typedef char *va_list;
+
+#define	__va_size(type) \
+	(((sizeof(type) + sizeof(int) - 1) / sizeof(int)) * sizeof(int))
+
+#if defined(__GNUCLIKE_BUILTIN_VAALIST)
+#define	va_alist	__builtin_va_alist
+#endif
+#define	va_dcl	int va_alist; ...
+
+#define	va_start(ap) \
+	((ap) = (va_list)&va_alist)
+
+#define	va_arg(ap, type) \
+	(*(type *)((ap) += __va_size(type), (ap) - __va_size(type)))
+
+#define	va_end(ap)
+
+#endif /* __GNUCLIKE_BUILTIN_VARARGS */
+
+#endif /* !_MACHINE_VARARGS_H_ */
diff --git a/sys/amd64/include/vdso.h b/sys/amd64/include/vdso.h
new file mode 100644
index 0000000..b81c455
--- /dev/null
+++ b/sys/amd64/include/vdso.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/vdso.h>
diff --git a/sys/amd64/include/vm.h b/sys/amd64/include/vm.h
new file mode 100644
index 0000000..6573e37
--- /dev/null
+++ b/sys/amd64/include/vm.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_VM_H_
+#define	_MACHINE_VM_H_
+
+#include <machine/specialreg.h>
+
+/* Memory attributes. */
+#define	VM_MEMATTR_UNCACHEABLE		((vm_memattr_t)PAT_UNCACHEABLE)
+#define	VM_MEMATTR_WRITE_COMBINING	((vm_memattr_t)PAT_WRITE_COMBINING)
+#define	VM_MEMATTR_WRITE_THROUGH	((vm_memattr_t)PAT_WRITE_THROUGH)
+#define	VM_MEMATTR_WRITE_PROTECTED	((vm_memattr_t)PAT_WRITE_PROTECTED)
+#define	VM_MEMATTR_WRITE_BACK		((vm_memattr_t)PAT_WRITE_BACK)
+#define	VM_MEMATTR_WEAK_UNCACHEABLE	((vm_memattr_t)PAT_UNCACHED)
+
+#define	VM_MEMATTR_DEFAULT		VM_MEMATTR_WRITE_BACK
+
+#endif /* !_MACHINE_VM_H_ */
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
new file mode 100644
index 0000000..9a3063e
--- /dev/null
+++ b/sys/amd64/include/vmm.h
@@ -0,0 +1,291 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_H_
+#define	_VMM_H_
+
+#ifdef _KERNEL
+
+#define	VM_MAX_NAMELEN	32
+
+struct vm;
+struct vm_memory_segment;
+struct seg_desc;
+struct vm_exit;
+struct vm_run;
+struct vlapic;
+
+enum x2apic_state;
+
+typedef int	(*vmm_init_func_t)(void);
+typedef int	(*vmm_cleanup_func_t)(void);
+typedef void *	(*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
+typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip);
+typedef void	(*vmi_cleanup_func_t)(void *vmi);
+typedef int	(*vmi_mmap_set_func_t)(void *vmi, vm_paddr_t gpa,
+				       vm_paddr_t hpa, size_t length,
+				       vm_memattr_t attr, int prot,
+				       boolean_t superpages_ok);
+typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *vmi, vm_paddr_t gpa);
+typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t *retval);
+typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t val);
+typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_inject_event_t)(void *vmi, int vcpu,
+				      int type, int vector,
+				      uint32_t code, int code_valid);
+typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+
+struct vmm_ops {
+	vmm_init_func_t		init;		/* module wide initialization */
+	vmm_cleanup_func_t	cleanup;
+
+	vmi_init_func_t		vminit;		/* vm-specific initialization */
+	vmi_run_func_t		vmrun;
+	vmi_cleanup_func_t	vmcleanup;
+	vmi_mmap_set_func_t	vmmmap_set;
+	vmi_mmap_get_func_t	vmmmap_get;
+	vmi_get_register_t	vmgetreg;
+	vmi_set_register_t	vmsetreg;
+	vmi_get_desc_t		vmgetdesc;
+	vmi_set_desc_t		vmsetdesc;
+	vmi_inject_event_t	vminject;
+	vmi_get_cap_t		vmgetcap;
+	vmi_set_cap_t		vmsetcap;
+};
+
+extern struct vmm_ops vmm_ops_intel;
+extern struct vmm_ops vmm_ops_amd;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+const char *vm_name(struct vm *vm);
+int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
+int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
+vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
+int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
+	      struct vm_memory_segment *seg);
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *ret_desc);
+int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *desc);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_inject_event(struct vm *vm, int vcpu, int type,
+		    int vector, uint32_t error_code, int error_code_valid);
+int vm_inject_nmi(struct vm *vm, int vcpu);
+int vm_nmi_pending(struct vm *vm, int vcpuid);
+void vm_nmi_clear(struct vm *vm, int vcpuid);
+uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
+struct vlapic *vm_lapic(struct vm *vm, int cpu);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
+int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
+void vm_activate_cpu(struct vm *vm, int vcpu);
+cpuset_t vm_active_cpus(struct vm *vm);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+
+/*
+ * Return 1 if device indicated by bus/slot/func is supposed to be a
+ * pci passthrough device.
+ *
+ * Return 0 otherwise.
+ */
+int vmm_is_pptdev(int bus, int slot, int func);
+
+void *vm_iommu_domain(struct vm *vm);
+
+enum vcpu_state {
+	VCPU_IDLE,
+	VCPU_RUNNING,
+	VCPU_CANNOT_RUN,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu);
+
+static int __inline
+vcpu_is_running(struct vm *vm, int vcpu)
+{
+	return (vcpu_get_state(vm, vcpu) == VCPU_RUNNING);
+}
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+
+#endif	/* KERNEL */
+
+#include <machine/vmm_instruction_emul.h>
+
+#define	VM_MAXCPU	8			/* maximum virtual cpus */
+
+/*
+ * Identifiers for events that can be injected into the VM
+ */
+enum vm_event_type {
+	VM_EVENT_NONE,
+	VM_HW_INTR,
+	VM_NMI,
+	VM_HW_EXCEPTION,
+	VM_SW_INTR,
+	VM_PRIV_SW_EXCEPTION,
+	VM_SW_EXCEPTION,
+	VM_EVENT_MAX
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_CR0,
+	VM_REG_GUEST_CR3,
+	VM_REG_GUEST_CR4,
+	VM_REG_GUEST_DR7,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS,
+	VM_REG_GUEST_LDTR,
+	VM_REG_GUEST_TR,
+	VM_REG_GUEST_IDTR,
+	VM_REG_GUEST_GDTR,
+	VM_REG_GUEST_EFER,
+	VM_REG_LAST
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+	VM_CAP_HALT_EXIT,
+	VM_CAP_MTRAP_EXIT,
+	VM_CAP_PAUSE_EXIT,
+	VM_CAP_UNRESTRICTED_GUEST,
+	VM_CAP_MAX
+};
+
+enum x2apic_state {
+	X2APIC_ENABLED,
+	X2APIC_AVAILABLE,
+	X2APIC_DISABLED,
+	X2APIC_STATE_LAST
+};
+
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+	uint64_t	base;
+	uint32_t	limit;
+	uint32_t	access;
+};
+
+enum vm_exitcode {
+	VM_EXITCODE_INOUT,
+	VM_EXITCODE_VMX,
+	VM_EXITCODE_BOGUS,
+	VM_EXITCODE_RDMSR,
+	VM_EXITCODE_WRMSR,
+	VM_EXITCODE_HLT,
+	VM_EXITCODE_MTRAP,
+	VM_EXITCODE_PAUSE,
+	VM_EXITCODE_PAGING,
+	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_MAX
+};
+
+struct vm_exit {
+	enum vm_exitcode	exitcode;
+	int			inst_length;	/* 0 means unknown */
+	uint64_t		rip;
+	union {
+		struct {
+			uint16_t	bytes:3;	/* 1 or 2 or 4 */
+			uint16_t	in:1;		/* out is 0, in is 1 */
+			uint16_t	string:1;
+			uint16_t	rep:1;
+			uint16_t	port;
+			uint32_t	eax;		/* valid for out */
+		} inout;
+		struct {
+			uint64_t	gpa;
+			struct vie	vie;
+		} paging;
+		/*
+		 * VMX specific payload. Used when there is no "better"
+		 * exitcode to represent the VM-exit.
+		 */
+		struct {
+			int		error;		/* vmx inst error */
+			uint32_t	exit_reason;
+			uint64_t	exit_qualification;
+		} vmx;
+		struct {
+			uint32_t	code;		/* ecx value */
+			uint64_t	wval;
+		} msr;
+		struct {
+			int		vcpu;
+			uint64_t	rip;
+		} spinup_ap;
+	} u;
+};
+
+#endif	/* _VMM_H_ */
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
new file mode 100644
index 0000000..0729927
--- /dev/null
+++ b/sys/amd64/include/vmm_dev.h
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_DEV_H_
+#define	_VMM_DEV_H_
+
+#ifdef _KERNEL
+void	vmmdev_init(void);
+int	vmmdev_cleanup(void);
+#endif
+
+struct vm_memory_segment {
+	vm_paddr_t	gpa;	/* in */
+	size_t		len;	/* in */
+};
+
+struct vm_register {
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	uint64_t	regval;
+};
+
+struct vm_seg_desc {			/* data or code segment */
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	struct seg_desc desc;
+};
+
+struct vm_run {
+	int		cpuid;
+	uint64_t	rip;		/* start running here */
+	struct vm_exit	vm_exit;
+};
+
+struct vm_event {
+	int		cpuid;
+	enum vm_event_type type;
+	int		vector;
+	uint32_t	error_code;
+	int		error_code_valid;
+};
+
+struct vm_lapic_irq {
+	int		cpuid;
+	int		vector;
+};
+
+struct vm_capability {
+	int		cpuid;
+	enum vm_cap_type captype;
+	int		capval;
+	int		allcpus;
+};
+
+struct vm_pptdev {
+	int		bus;
+	int		slot;
+	int		func;
+};
+
+struct vm_pptdev_mmio {
+	int		bus;
+	int		slot;
+	int		func;
+	vm_paddr_t	gpa;
+	vm_paddr_t	hpa;
+	size_t		len;
+};
+
+struct vm_pptdev_msi {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		numvec;		/* 0 means disabled */
+	int		vector;
+	int		destcpu;
+};
+
+struct vm_pptdev_msix {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		idx;
+	uint32_t	msg;
+	uint32_t	vector_control;
+	uint64_t	addr;
+};
+
+struct vm_nmi {
+	int		cpuid;
+};
+
+#define	MAX_VM_STATS	64
+struct vm_stats {
+	int		cpuid;				/* in */
+	int		num_entries;			/* out */
+	struct timeval	tv;
+	uint64_t	statbuf[MAX_VM_STATS];
+};
+
+struct vm_stat_desc {
+	int		index;				/* in */
+	char		desc[128];			/* out */
+};
+
+struct vm_x2apic {
+	int			cpuid;
+	enum x2apic_state	state;
+};
+
+enum {
+	IOCNUM_RUN,
+	IOCNUM_MAP_MEMORY,
+	IOCNUM_GET_MEMORY_SEG,
+	IOCNUM_SET_REGISTER,
+	IOCNUM_GET_REGISTER,
+	IOCNUM_SET_SEGMENT_DESCRIPTOR,
+	IOCNUM_GET_SEGMENT_DESCRIPTOR,
+	IOCNUM_INJECT_EVENT,
+	IOCNUM_LAPIC_IRQ,
+	IOCNUM_SET_CAPABILITY,
+	IOCNUM_GET_CAPABILITY,
+	IOCNUM_BIND_PPTDEV,
+	IOCNUM_UNBIND_PPTDEV,
+	IOCNUM_MAP_PPTDEV_MMIO,
+	IOCNUM_PPTDEV_MSI,
+	IOCNUM_PPTDEV_MSIX,
+	IOCNUM_INJECT_NMI,
+	IOCNUM_VM_STATS,
+	IOCNUM_VM_STAT_DESC,
+	IOCNUM_SET_X2APIC_STATE,
+	IOCNUM_GET_X2APIC_STATE,
+};
+
+#define	VM_RUN		\
+	_IOWR('v', IOCNUM_RUN, struct vm_run)
+#define	VM_MAP_MEMORY	\
+	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
+#define	VM_GET_MEMORY_SEG \
+	_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define	VM_SET_REGISTER \
+	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define	VM_GET_REGISTER \
+	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define	VM_SET_SEGMENT_DESCRIPTOR \
+	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_GET_SEGMENT_DESCRIPTOR \
+	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_INJECT_EVENT	\
+	_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
+#define	VM_LAPIC_IRQ 		\
+	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_SET_CAPABILITY \
+	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define	VM_GET_CAPABILITY \
+	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define	VM_BIND_PPTDEV \
+	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
+#define	VM_UNBIND_PPTDEV \
+	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
+#define	VM_MAP_PPTDEV_MMIO \
+	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define	VM_PPTDEV_MSI \
+	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
+#define	VM_PPTDEV_MSIX \
+	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
+#define VM_INJECT_NMI \
+	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
+#define	VM_STATS \
+	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define	VM_STAT_DESC \
+	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define	VM_SET_X2APIC_STATE \
+	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
+#define	VM_GET_X2APIC_STATE \
+	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
+#endif
diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
new file mode 100644
index 0000000..a812a73
--- /dev/null
+++ b/sys/amd64/include/vmm_instruction_emul.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1;
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+			    mem_region_read_t mrr, mem_region_write_t mrw,
+			    void *mrarg);
+
+#ifdef _KERNEL
+/*
+ * APIs to fetch and decode the instruction from nested page fault handler.
+ */
+int vmm_fetch_instruction(struct vm *vm, int cpuid,
+			  uint64_t rip, int inst_length, uint64_t cr3,
+			  struct vie *vie);
+
+/*
+ * Decode the instruction fetched into 'vie' so it can be emulated.
+ *
+ * 'gla' is the guest linear address provided by the hardware assist
+ * that caused the nested page table fault. It is used to verify that
+ * the software instruction decoding is in agreement with the hardware.
+ * 
+ * Some hardware assists do not provide the 'gla' to the hypervisor.
+ * To skip the 'gla' verification for this or any other reason pass
+ * in VIE_INVALID_GLA instead.
+ */
+#define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
+int vmm_decode_instruction(struct vm *vm, int cpuid,
+			   uint64_t gla, struct vie *vie);
+#endif	/* _KERNEL */
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
new file mode 100644
index 0000000..e06fa39
--- /dev/null
+++ b/sys/amd64/include/vmparam.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vmparam.h	5.9 (Berkeley) 5/12/91
+ * $FreeBSD$
+ */
+
+
+#ifndef _MACHINE_VMPARAM_H_
+#define	_MACHINE_VMPARAM_H_ 1
+
+/*
+ * Machine dependent constants for AMD64.
+ */
+
+/*
+ * Virtual memory related constants, all in bytes
+ */
+#define	MAXTSIZ		(128UL*1024*1024)	/* max text size */
+#ifndef DFLDSIZ
+#define	DFLDSIZ		(32768UL*1024*1024)	/* initial data size limit */
+#endif
+#ifndef MAXDSIZ
+#define	MAXDSIZ		(32768UL*1024*1024)	/* max data size */
+#endif
+#ifndef	DFLSSIZ
+#define	DFLSSIZ		(8UL*1024*1024)		/* initial stack size limit */
+#endif
+#ifndef	MAXSSIZ
+#define	MAXSSIZ		(512UL*1024*1024)	/* max stack size */
+#endif
+#ifndef SGROWSIZ
+#define	SGROWSIZ	(128UL*1024)		/* amount to grow stack */
+#endif
+
+/*
+ * We provide a machine specific single page allocator through the use
+ * of the direct mapped segment.  This uses 2MB pages for reduced
+ * TLB pressure.
+ */
+#define	UMA_MD_SMALL_ALLOC
+
+/*
+ * The physical address space is densely populated.
+ */
+#define	VM_PHYSSEG_DENSE
+
+/*
+ * The number of PHYSSEG entries must be one greater than the number
+ * of phys_avail entries because the phys_avail entry that spans the
+ * largest physical address that is accessible by ISA DMA is split
+ * into two PHYSSEG entries. 
+ */
+#define	VM_PHYSSEG_MAX		31
+
+/*
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
+ * the pool from which physical pages for page tables and small UMA
+ * objects are allocated.
+ */
+#define	VM_NFREEPOOL		3
+#define	VM_FREEPOOL_CACHE	2
+#define	VM_FREEPOOL_DEFAULT	0
+#define	VM_FREEPOOL_DIRECT	1
+
+/*
+ * Create two free page lists: VM_FREELIST_DEFAULT is for physical
+ * pages that are above the largest physical address that is
+ * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages
+ * that are below that address.
+ */
+#define	VM_NFREELIST		2
+#define	VM_FREELIST_DEFAULT	0
+#define	VM_FREELIST_ISADMA	1
+
+/*
+ * An allocation size of 16MB is supported in order to optimize the
+ * use of the direct map by UMA.  Specifically, a cache line contains
+ * at most 8 PDEs, collectively mapping 16MB of physical memory.  By
+ * reducing the number of distinct 16MB "pages" that are used by UMA,
+ * the physical memory allocator reduces the likelihood of both 2MB
+ * page TLB misses and cache misses caused by 2MB page TLB misses.
+ */
+#define	VM_NFREEORDER		13
+
+/*
+ * Only one memory domain.
+ */
+#ifndef VM_NDOMAIN
+#define	VM_NDOMAIN		1
+#endif
+
+/*
+ * Enable superpage reservations: 1 level.
+ */
+#ifndef	VM_NRESERVLEVEL
+#define	VM_NRESERVLEVEL		1
+#endif
+
+/*
+ * Level 0 reservations consist of 512 pages.
+ */
+#ifndef	VM_LEVEL_0_ORDER
+#define	VM_LEVEL_0_ORDER	9
+#endif
+
+#ifdef	SMP
+#define	PA_LOCK_COUNT	256
+#endif
+
+/*
+ * Virtual addresses of things.  Derived from the page directory and
+ * page table indexes from pmap.h for precision.
+ *
+ * 0x0000000000000000 - 0x00007fffffffffff   user map
+ * 0x0000800000000000 - 0xffff7fffffffffff   does not exist (hole)
+ * 0xffff800000000000 - 0xffff804020100fff   recursive page table (512GB slot)
+ * 0xffff804020101000 - 0xfffffdffffffffff   unused
+ * 0xfffffe0000000000 - 0xfffffeffffffffff   1TB direct map
+ * 0xffffff0000000000 - 0xffffff7fffffffff   unused
+ * 0xffffff8000000000 - 0xffffffffffffffff   512GB kernel map
+ *
+ * Within the kernel map:
+ *
+ * 0xffffffff80000000                        KERNBASE
+ */
+
+#define	VM_MAX_KERNEL_ADDRESS	KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, NPTEPG-1)
+#define	VM_MIN_KERNEL_ADDRESS	KVADDR(KPML4I, NPDPEPG-512, 0, 0)
+
+#define	DMAP_MIN_ADDRESS	KVADDR(DMPML4I, 0, 0, 0)
+#define	DMAP_MAX_ADDRESS	KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
+
+#define	KERNBASE		KVADDR(KPML4I, KPDPI, 0, 0)
+
+#define	UPT_MAX_ADDRESS		KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
+#define	UPT_MIN_ADDRESS		KVADDR(PML4PML4I, 0, 0, 0)
+
+#define	VM_MAXUSER_ADDRESS	UVADDR(NUPML4E, 0, 0, 0)
+
+#define	SHAREDPAGE		(VM_MAXUSER_ADDRESS - PAGE_SIZE)
+#define	USRSTACK		SHAREDPAGE
+
+#define	VM_MAX_ADDRESS		UPT_MAX_ADDRESS
+#define	VM_MIN_ADDRESS		(0)
+
+#define	PHYS_TO_DMAP(x)		((x) | DMAP_MIN_ADDRESS)
+#define	DMAP_TO_PHYS(x)		((x) & ~DMAP_MIN_ADDRESS)
+
+/* virtual sizes (bytes) for various kernel submaps */
+#ifndef VM_KMEM_SIZE
+#define	VM_KMEM_SIZE		(12 * 1024 * 1024)
+#endif
+
+/*
+ * How many physical pages per KVA page allocated.
+ * min(max(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE),
+ *     VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX)
+ * is the total KVA space allocated for kmem_map.
+ */
+#ifndef VM_KMEM_SIZE_SCALE
+#define	VM_KMEM_SIZE_SCALE	(1)
+#endif
+
+/*
+ * Ceiling on amount of kmem_map kva space.
+ */
+#ifndef VM_KMEM_SIZE_MAX
+#define	VM_KMEM_SIZE_MAX	((VM_MAX_KERNEL_ADDRESS - \
+    VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5)
+#endif
+
+/* initial pagein size of beginning of executable file */
+#ifndef VM_INITIAL_PAGEIN
+#define	VM_INITIAL_PAGEIN	16
+#endif
+
+#define	ZERO_REGION_SIZE	(2 * 1024 * 1024)	/* 2MB */
+
+#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/amd64/include/xen/hypercall.h b/sys/amd64/include/xen/hypercall.h
new file mode 100644
index 0000000..50fa376
--- /dev/null
+++ b/sys/amd64/include/xen/hypercall.h
@@ -0,0 +1,415 @@
+/******************************************************************************
+ * hypercall.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * 64-bit updates:
+ *   Benjamin Liu <benjamin.liu@intel.com>
+ *   Jun Nakajima <jun.nakajima@intel.com>
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __MACHINE_XEN_HYPERCALL_H__
+#define __MACHINE_XEN_HYPERCALL_H__
+
+#include <sys/systm.h>
+
+#ifndef __XEN_HYPERVISOR_H__
+# error "please don't include this file directly"
+#endif
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#define	ENOXENSYS	38
+#define CONFIG_XEN_COMPAT	0x030002
+#define __must_check
+
+#ifdef XEN
+#define HYPERCALL_STR(name)					\
+	"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
+#else
+#define HYPERCALL_STR(name)					\
+	"mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\
+	"add hypercall_stubs(%%rip),%%rax; "			\
+	"call *%%rax"
+#endif
+
+#define _hypercall0(type, name)			\
+({						\
+	type __res;				\
+	__asm__ volatile (				\
+		HYPERCALL_STR(name)		\
+		: "=a" (__res)			\
+		:				\
+		: "memory" );			\
+	__res;					\
+})
+
+#define _hypercall1(type, name, a1)				\
+({								\
+	type __res;						\
+	long __ign1;						\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1)			\
+		: "1" ((long)(a1))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall2(type, name, a1, a2)				\
+({								\
+	type __res;						\
+	long __ign1, __ign2;					\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
+		: "1" ((long)(a1)), "2" ((long)(a2))		\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall3(type, name, a1, a2, a3)			\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
+		"=d" (__ign3)					\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		"3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)			\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	register long __arg4 __asm__("r10") = (long)(a4);		\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
+		  "=d" (__ign3), "+r" (__arg4)			\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		  "3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	register long __arg4 __asm__("r10") = (long)(a4);		\
+	register long __arg5 __asm__("r8") = (long)(a5);		\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
+		  "=d" (__ign3), "+r" (__arg4), "+r" (__arg5)	\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		  "3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+static inline int __must_check
+HYPERVISOR_set_trap_table(
+	const trap_info_t *table)
+{
+	return _hypercall1(int, set_trap_table, table);
+}
+
+static inline int __must_check
+HYPERVISOR_mmu_update(
+	mmu_update_t *req, unsigned int count, unsigned int *success_count,
+	domid_t domid)
+{
+	return _hypercall4(int, mmu_update, req, count, success_count, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_mmuext_op(
+	struct mmuext_op *op, unsigned int count, unsigned int *success_count,
+	domid_t domid)
+{
+	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_set_gdt(
+	unsigned long *frame_list, unsigned int entries)
+{
+	return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
+static inline int __must_check
+HYPERVISOR_stack_switch(
+	unsigned long ss, unsigned long esp)
+{
+	return _hypercall2(int, stack_switch, ss, esp);
+}
+
+static inline int __must_check
+HYPERVISOR_set_callbacks(
+	unsigned long event_address, unsigned long failsafe_address, 
+	unsigned long syscall_address)
+{
+	return _hypercall3(int, set_callbacks,
+			   event_address, failsafe_address, syscall_address);
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+	int set)
+{
+	return _hypercall1(int, fpu_taskswitch, set);
+}
+
+static inline int __must_check
+HYPERVISOR_sched_op_compat(
+	int cmd, unsigned long arg)
+{
+	return _hypercall2(int, sched_op_compat, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_sched_op(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long __must_check
+HYPERVISOR_set_timer_op(
+	uint64_t timeout)
+{
+	return _hypercall1(long, set_timer_op, timeout);
+}
+
+static inline int __must_check
+HYPERVISOR_platform_op(
+	struct xen_platform_op *platform_op)
+{
+	platform_op->interface_version = XENPF_INTERFACE_VERSION;
+	return _hypercall1(int, platform_op, platform_op);
+}
+
+static inline int __must_check
+HYPERVISOR_set_debugreg(
+	unsigned int reg, unsigned long value)
+{
+	return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static inline unsigned long __must_check
+HYPERVISOR_get_debugreg(
+	unsigned int reg)
+{
+	return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
+static inline int __must_check
+HYPERVISOR_update_descriptor(
+	unsigned long ma, unsigned long word)
+{
+	return _hypercall2(int, update_descriptor, ma, word);
+}
+
+static inline int __must_check
+HYPERVISOR_memory_op(
+	unsigned int cmd, void *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_multicall(
+	multicall_entry_t *call_list, unsigned int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int __must_check
+HYPERVISOR_update_va_mapping(
+	unsigned long va, uint64_t new_val, unsigned long flags)
+{
+	return _hypercall3(int, update_va_mapping, va, new_val, flags);
+}
+
+static inline int __must_check
+HYPERVISOR_event_channel_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, event_channel_op, cmd, arg);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (unlikely(rc == -ENOXENSYS)) {
+		struct evtchn_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, event_channel_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+
+	return rc;
+}
+
+static inline int __must_check
+HYPERVISOR_xen_version(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_console_io(
+	int cmd, unsigned int count, char *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int __must_check
+HYPERVISOR_physdev_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, physdev_op, cmd, arg);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (unlikely(rc == -ENOXENSYS)) {
+		struct physdev_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, physdev_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+
+	return rc;
+}
+
+static inline int __must_check
+HYPERVISOR_grant_table_op(
+	unsigned int cmd, void *uop, unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+static inline int __must_check
+HYPERVISOR_update_va_mapping_otherdomain(
+	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
+{
+	return _hypercall4(int, update_va_mapping_otherdomain, va,
+			   new_val, flags, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_vm_assist(
+	unsigned int cmd, unsigned int type)
+{
+	return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int __must_check
+HYPERVISOR_vcpu_op(
+	int cmd, unsigned int vcpuid, void *extra_args)
+{
+	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+}
+
+static inline int __must_check
+HYPERVISOR_set_segment_base(
+	int reg, unsigned long value)
+{
+	return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline int __must_check
+HYPERVISOR_suspend(
+	unsigned long srec)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = SHUTDOWN_suspend
+	};
+
+	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
+			     &sched_shutdown, srec);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
+				 SHUTDOWN_suspend, srec);
+#endif
+
+	return rc;
+}
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+static inline int
+HYPERVISOR_nmi_op(
+	unsigned long op, void *arg)
+{
+	return _hypercall2(int, nmi_op, op, arg);
+}
+#endif
+
+#ifndef CONFIG_XEN
+static inline unsigned long __must_check
+HYPERVISOR_hvm_op(
+    int op, void *arg)
+{
+    return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+#endif
+
+static inline int __must_check
+HYPERVISOR_callback_op(
+	int cmd, const void *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_xenoprof_op(
+	int op, void *arg)
+{
+	return _hypercall2(int, xenoprof_op, op, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_kexec_op(
+	unsigned long op, void *args)
+{
+	return _hypercall2(int, kexec_op, op, args);
+}
+
+#undef __must_check
+
+#endif /* __MACHINE_XEN_HYPERCALL_H__ */
diff --git a/sys/amd64/include/xen/synch_bitops.h b/sys/amd64/include/xen/synch_bitops.h
new file mode 100644
index 0000000..746687a
--- /dev/null
+++ b/sys/amd64/include/xen/synch_bitops.h
@@ -0,0 +1,129 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ ( 
+        "lock btsl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btrl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btcl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+
+    __asm__ __volatile__ (
+        "lock btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+struct __synch_xchg_dummy { unsigned long a[100]; };
+#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x))
+
+#define synch_cmpxchg(ptr, old, new) \
+((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
+                                     (unsigned long)(old), \
+                                     (unsigned long)(new), \
+                                     sizeof(*(ptr))))
+
+static inline unsigned long __synch_cmpxchg(volatile void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__("lock; cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__("lock; cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ __volatile__("lock; cmpxchgl %k1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 8:
+		__asm__ __volatile__("lock; cmpxchgq %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & 
+            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+    return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */
diff --git a/sys/amd64/include/xen/xen-os.h b/sys/amd64/include/xen/xen-os.h
new file mode 100644
index 0000000..163e7f2
--- /dev/null
+++ b/sys/amd64/include/xen/xen-os.h
@@ -0,0 +1,296 @@
+/******************************************************************************
+ * os.h
+ * 
+ * random collection of macros and definition
+ */
+
+#ifndef _XEN_OS_H_
+#define _XEN_OS_H_
+
+#ifdef PAE
+#define CONFIG_X86_PAE
+#endif
+
+#if !defined(__XEN_INTERFACE_VERSION__)  
+/*  
+ * Can update to a more recent version when we implement  
+ * the hypercall page  
+ */  
+#define  __XEN_INTERFACE_VERSION__ 0x00030204  
+#endif  
+
+#include <xen/interface/xen.h>
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+extern int gdtset;
+
+extern shared_info_t *HYPERVISOR_shared_info;
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__ ( "rep;nop" : : : "memory" );
+}
+#define cpu_relax() rep_nop()
+
+/* crude memory allocator for memory allocation early in 
+ *  boot
+ */
+void *bootmem_alloc(unsigned int size);
+void bootmem_free(void *ptr, unsigned int size);
+
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+
+void printk(const char *fmt, ...);
+
+/* some function prototypes */
+void trap_init(void);
+
+#define likely(x)  __builtin_expect((x),1)
+#define unlikely(x)  __builtin_expect((x),0)
+
+#ifndef XENHVM
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+
+#define __cli()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+#define __sti()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 0;                                  \
+        barrier(); /* unmask then check (avoid races) */                \
+        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
+                force_evtchn_callback();                                \
+} while (0)
+
+#define __restore_flags(x)                                              \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
+                barrier(); /* unmask then check (avoid races) */        \
+                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
+                        force_evtchn_callback();                        \
+        } 								\
+} while (0)
+
+/*
+ * Add critical_{enter, exit}?
+ *
+ */
+#define __save_and_cli(x)                                               \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        (x) = _vcpu->evtchn_upcall_mask;                                \
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+
+#define local_irq_save(x)       __save_and_cli(x)
+#define local_irq_restore(x)    __restore_flags(x)
+#define local_irq_disable()     __cli()
+#define local_irq_enable()      __sti()
+
+#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
+#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
+#define spin_lock_irqsave mtx_lock_irqsave
+#define spin_unlock_irqrestore mtx_unlock_irqrestore
+
+#else
+#endif
+
+#ifndef mb
+#define mb() __asm__ __volatile__("mfence":::"memory")
+#endif
+#ifndef rmb
+#define rmb() __asm__ __volatile__("lfence":::"memory");
+#endif
+#ifndef wmb
+#define wmb() barrier()
+#endif
+#ifdef SMP
+#define smp_mb() mb() 
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends()      read_barrier_depends()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#define smp_read_barrier_depends()      do { } while(0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+
+#define xen_xchg(ptr,v) \
+        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+                                   int size)
+{
+    switch (size) {
+    case 1:
+        __asm__ __volatile__("xchgb %b0,%1"
+                             :"=q" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 2:
+        __asm__ __volatile__("xchgw %w0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 4:
+        __asm__ __volatile__("xchgl %0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    }
+    return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+        int oldbit;
+
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %2,%1\n\tsbbl %0,%0"
+                :"=r" (oldbit),"=m" (ADDR)
+                :"Ir" (nr) : "memory");
+        return oldbit;
+}
+
+static __inline int constant_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline int variable_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    
+    __asm__ __volatile__(
+        "btl %2,%1\n\tsbbl %0,%0"
+        :"=r" (oldbit)
+        :"m" (ADDR),"Ir" (nr));
+    return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btsl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+        __asm__ __volatile__(
+                LOCK "incl %0"
+                :"=m" (v->counter)
+                :"m" (v->counter));
+}
+
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */
diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h
new file mode 100644
index 0000000..d03d4f6
--- /dev/null
+++ b/sys/amd64/include/xen/xenfunc.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2004, 2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _XEN_XENFUNC_H_
+#define _XEN_XENFUNC_H_
+
+#ifdef XENHVM
+#include <machine/xen/xenvar.h>
+#else
+#include <machine/xen/xenpmap.h>
+#include <machine/segments.h>
+#endif
+
+#define BKPT __asm__("int3");
+#define XPQ_CALL_DEPTH 5
+#define XPQ_CALL_COUNT 2
+#define PG_PRIV PG_AVAIL3
+typedef struct { 
+	unsigned long pt_ref;
+	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
+} pteinfo_t;
+
+extern pteinfo_t *pteinfo_list;
+#ifdef XENDEBUG_LOW
+#define	__PRINTK(x) printk x
+#else
+#define	__PRINTK(x)
+#endif
+
+char *xen_setbootenv(char *cmd_line);
+
+int  xen_boothowto(char *envp);
+
+void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
+
+#ifdef INVARIANTS
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
+#endif	
+
+#ifndef XENHVM
+void xen_update_descriptor(union descriptor *, union descriptor *);
+#endif
+
+extern struct mtx balloon_lock;
+#if 0
+#define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
+#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
+#else
+#define balloon_lock(__flags)   __flags = 1
+#define balloon_unlock(__flags) __flags = 0
+#endif
+
+
+
+#endif /* _XEN_XENFUNC_H_ */
diff --git a/sys/amd64/include/xen/xenpmap.h b/sys/amd64/include/xen/xenpmap.h
new file mode 100644
index 0000000..d768dad
--- /dev/null
+++ b/sys/amd64/include/xen/xenpmap.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENPMAP_H_
+#define _XEN_XENPMAP_H_
+
+#include <machine/xen/features.h>
+
+void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
+void xen_pt_switch(vm_paddr_t);
+void xen_set_ldt(vm_paddr_t, unsigned long);
+void xen_pgdpt_pin(vm_paddr_t);
+void xen_pgd_pin(vm_paddr_t);
+void xen_pgd_unpin(vm_paddr_t);
+void xen_pt_pin(vm_paddr_t);
+void xen_pt_unpin(vm_paddr_t);
+void xen_flush_queue(void);
+void xen_check_queue(void);
+#if 0
+void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
+#endif
+
+#ifdef INVARIANTS
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
+#endif	
+
+#ifdef PMAP_DEBUG
+#define PMAP_REF pmap_ref
+#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
+#define PMAP_MARK_PRIV pmap_mark_privileged
+#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
+#else 
+#define PMAP_MARK_PRIV(a)
+#define PMAP_MARK_UNPRIV(a)
+#define PMAP_REF(a, b)
+#define PMAP_DEC_REF_PAGE(a)
+#endif
+
+#define ALWAYS_SYNC 0
+
+#ifdef PT_DEBUG
+#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) 
+#else
+#define PT_LOG()
+#endif
+
+#define INVALID_P2M_ENTRY	(~0UL)
+
+#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
+
+#define SH_PD_SET_VA        1
+#define SH_PD_SET_VA_MA     2
+#define SH_PD_SET_VA_CLEAR  3
+
+struct pmap;
+void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
+#ifdef notyet
+static vm_paddr_t
+vptetomachpte(vm_paddr_t *pte)
+{
+	vm_offset_t offset, ppte;
+	vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
+	int pgindex;
+
+	ppte = (vm_offset_t)pte;
+	pgoffset = (ppte & PAGE_MASK);
+	offset = ppte - (vm_offset_t)PTmap;
+	pgindex = ppte >> PDRSHIFT;
+
+	pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
+	retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
+	return (retval);
+}
+#endif
+#define	PT_GET(_ptp)						\
+	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
+
+#ifdef WRITABLE_PAGETABLES
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        PT_LOG();                                               \
+        *(_ptp) = xpmap_ptom((_npte));                          \
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+        PT_LOG();                                               \
+        *(_ptp) = (_npte);                                      \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        PT_LOG();                                               \
+        *(_ptp) = 0;                                            \
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA);           \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptp, sync) do {			\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR);  	\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#else /* !WRITABLE_PAGETABLES */
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+	xen_queue_pt_update(vtomach(_ptp), 	        \
+			    xpmap_ptom(_npte)); 		\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+	xen_queue_pt_update(vtomach(_ptp), _npte);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+	xen_queue_pt_update(vtomach(_ptp), 0);            \
+	if (sync || ALWAYS_SYNC)				\
+		xen_flush_queue();				\
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA);     \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA);  \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do {		\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR);    \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#endif
+
+#define PT_SET_MA(_va, _ma) 					\
+do { 								\
+   PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
+	   (_ma),						\
+	   UVMF_INVLPG| UVMF_ALL) < 0);			\
+} while (/*CONSTCOND*/0)	  
+
+#define	PT_UPDATES_FLUSH() do {				        \
+        xen_flush_queue();                                      \
+} while (/*CONSTCOND*/0)
+
+static __inline vm_paddr_t
+xpmap_mtop(vm_paddr_t mpa)
+{
+	vm_paddr_t tmp = (mpa & PG_FRAME);
+	
+	return machtophys(tmp) | (mpa & ~PG_FRAME);
+}
+
+static __inline vm_paddr_t
+xpmap_ptom(vm_paddr_t ppa)
+{
+	vm_paddr_t tmp = (ppa & PG_FRAME);
+
+	return phystomach(tmp) | (ppa & ~PG_FRAME);
+}
+
+static __inline void
+set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+#ifdef notyet	
+        PANIC_IF(max_mapnr && pfn >= max_mapnr);
+#endif	
+        if (xen_feature(XENFEAT_auto_translated_physmap)) {
+#ifdef notyet		
+                PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
+#endif		
+                return;
+        }
+        xen_phys_machine[pfn] = mfn;
+}
+
+
+
+
+#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h
new file mode 100644
index 0000000..d9dbc5d
--- /dev/null
+++ b/sys/amd64/include/xen/xenvar.h
@@ -0,0 +1,120 @@
+/*-
+ * Copyright (c) 2008 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef XENVAR_H_
+#define XENVAR_H_
+#define XBOOTUP 0x1
+#define XPMAP   0x2
+extern int xendebug_flags;
+#ifndef NOXENDEBUG
+#define XENPRINTF printk
+#else
+#define XENPRINTF printf
+#endif
+#include <xen/features.h>
+
+#if 0
+#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_DEBUG(argflags, _f, _a...) \
+if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
+#else
+#define TRACE_ENTER
+#define TRACE_EXIT
+#define TRACE_DEBUG(argflags, _f, _a...)
+#endif
+
+#ifdef XENHVM
+
+static inline vm_paddr_t
+phystomach(vm_paddr_t pa)
+{
+
+	return (pa);
+}
+
+static inline vm_paddr_t
+machtophys(vm_paddr_t ma)
+{
+
+	return (ma);
+}
+
+#define vtomach(va)	pmap_kextract((vm_offset_t) (va))
+#define PFNTOMFN(pa)	(pa)
+#define MFNTOPFN(ma)	(ma)
+
+#define set_phys_to_machine(pfn, mfn)	((void)0)
+#define phys_to_machine_mapping_valid(pfn)	(TRUE)
+#define PT_UPDATES_FLUSH()		((void)0)
+
+#else
+
+extern	xen_pfn_t *xen_phys_machine;
+
+
+extern xen_pfn_t *xen_machine_phys;
+/* Xen starts physical pages after the 4MB ISA hole -
+ * FreeBSD doesn't
+ */
+
+
+#undef ADD_ISA_HOLE /* XXX */
+
+#ifdef ADD_ISA_HOLE
+#define ISA_INDEX_OFFSET 1024 
+#define ISA_PDR_OFFSET 1
+#else
+#define ISA_INDEX_OFFSET 0
+#define ISA_PDR_OFFSET 0
+#endif
+
+
+#define PFNTOMFN(i) (xen_phys_machine[(i)])
+#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
+
+#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
+#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
+
+#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
+#define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
+
+#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
+#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
+#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
+
+#endif
+
+void xpq_init(void);
+
+int  xen_create_contiguous_region(vm_page_t pages, int npages);
+
+void  xen_destroy_contiguous_region(void * addr, int npages);
+
+#endif
diff --git a/sys/amd64/linux32/Makefile b/sys/amd64/linux32/Makefile
new file mode 100644
index 0000000..4826981
--- /dev/null
+++ b/sys/amd64/linux32/Makefile
@@ -0,0 +1,17 @@
+# Makefile for syscall tables
+#
+# $FreeBSD$
+
+all:
+	@echo "make sysent only"
+
+sysent:  linux32_sysent.c linux32_syscall.h linux32_proto.h linux32_syscalls.c linux32_systrace_args.c
+
+linux32_sysent.c linux32_syscall.h linux32_proto.h linux32_syscalls.c linux32_systrace_args.c: ../../kern/makesyscalls.sh \
+		syscalls.master syscalls.conf
+	-mv -f linux32_sysent.c linux32_sysent.c.bak
+	-mv -f linux32_syscall.h linux32_syscall.h.bak
+	-mv -f linux32_proto.h linux32_proto.h.bak
+	-mv -f linux32_syscalls.c linux32_syscalls.c.bak
+	-mv -f linux32_systrace_args.c linux32_systrace_args.c.bak
+	sh ../../kern/makesyscalls.sh syscalls.master syscalls.conf
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h
new file mode 100644
index 0000000..7b52a64
--- /dev/null
+++ b/sys/amd64/linux32/linux.h
@@ -0,0 +1,788 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2001 Doug Rabson
+ * Copyright (c) 1994-1996 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _AMD64_LINUX_H_
+#define	_AMD64_LINUX_H_
+
+#include <amd64/linux32/linux32_syscall.h>
+
+/*
+ * debugging support
+ */
+extern u_char linux_debug_map[];
+#define	ldebug(name)	isclr(linux_debug_map, LINUX_SYS_linux_ ## name)
+#define	ARGS(nm, fmt)	"linux(%ld): "#nm"("fmt")\n", (long)td->td_proc->p_pid
+#define	LMSG(fmt)	"linux(%ld): "fmt"\n", (long)td->td_proc->p_pid
+#define	LINUX_DTRACE	linuxulator32
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_LINUX);
+#endif
+
+#define	LINUX32_MAXUSER		((1ul << 32) - PAGE_SIZE)
+#define	LINUX32_SHAREDPAGE	(LINUX32_MAXUSER - PAGE_SIZE)
+#define	LINUX32_USRSTACK	LINUX32_SHAREDPAGE
+
+/* XXX 16 = sizeof(linux32_ps_strings) */
+#define	LINUX32_PS_STRINGS	(LINUX32_USRSTACK - 16)
+#define	LINUX32_MAXDSIZ		(512 * 1024 * 1024)	/* 512MB */
+#define	LINUX32_MAXSSIZ		(64 * 1024 * 1024)	/* 64MB */
+#define	LINUX32_MAXVMEM		0			/* Unlimited */
+
+#define	PTRIN(v)	(void *)(uintptr_t)(v)
+#define	PTROUT(v)	(l_uintptr_t)(uintptr_t)(v)
+
+/*
+ * Provide a separate set of types for the Linux types.
+ */
+typedef int		l_int;
+typedef int32_t		l_long;
+typedef int64_t		l_longlong;
+typedef short		l_short;
+typedef unsigned int	l_uint;
+typedef uint32_t	l_ulong;
+typedef uint64_t	l_ulonglong;
+typedef unsigned short	l_ushort;
+
+typedef l_ulong		l_uintptr_t;
+typedef l_long		l_clock_t;
+typedef l_int		l_daddr_t;
+typedef l_ushort	l_dev_t;
+typedef l_uint		l_gid_t;
+typedef l_ushort	l_gid16_t;
+typedef l_ulong		l_ino_t;
+typedef l_int		l_key_t;
+typedef l_longlong	l_loff_t;
+typedef l_ushort	l_mode_t;
+typedef l_long		l_off_t;
+typedef l_int		l_pid_t;
+typedef l_uint		l_size_t;
+typedef l_long		l_suseconds_t;
+typedef l_long		l_time_t;
+typedef l_uint		l_uid_t;
+typedef l_ushort	l_uid16_t;
+typedef l_int		l_timer_t;
+typedef l_int		l_mqd_t;
+
+typedef struct {
+	l_int		val[2];
+} __packed l_fsid_t;
+
+typedef struct {
+	l_time_t	tv_sec;
+	l_suseconds_t	tv_usec;
+} l_timeval;
+
+#define	l_fd_set	fd_set
+
+/*
+ * Miscellaneous
+ */
+#define	LINUX_AT_COUNT		16	/* Count of used aux entry types.
+					 * Keep this synchronized with
+					 * elf_linux_fixup() code.
+					 */
+struct l___sysctl_args
+{
+	l_uintptr_t	name;
+	l_int		nlen;
+	l_uintptr_t	oldval;
+	l_uintptr_t	oldlenp;
+	l_uintptr_t	newval;
+	l_size_t	newlen;
+	l_ulong		__spare[4];
+} __packed;
+
+/* Resource limits */
+#define	LINUX_RLIMIT_CPU	0
+#define	LINUX_RLIMIT_FSIZE	1
+#define	LINUX_RLIMIT_DATA	2
+#define	LINUX_RLIMIT_STACK	3
+#define	LINUX_RLIMIT_CORE	4
+#define	LINUX_RLIMIT_RSS	5
+#define	LINUX_RLIMIT_NPROC	6
+#define	LINUX_RLIMIT_NOFILE	7
+#define	LINUX_RLIMIT_MEMLOCK	8
+#define	LINUX_RLIMIT_AS		9	/* Address space limit */
+
+#define	LINUX_RLIM_NLIMITS	10
+
+struct l_rlimit {
+	l_ulong rlim_cur;
+	l_ulong rlim_max;
+} __packed;
+
+struct l_rusage {
+	l_timeval ru_utime;
+	l_timeval ru_stime;
+	l_long	ru_maxrss;
+	l_long	ru_ixrss;
+	l_long	ru_idrss;
+	l_long	ru_isrss;
+	l_long	ru_minflt;
+	l_long	ru_majflt;
+	l_long	ru_nswap;
+	l_long	ru_inblock;
+	l_long	ru_oublock;
+	l_long	ru_msgsnd;
+	l_long	ru_msgrcv;
+	l_long	ru_nsignals;
+	l_long	ru_nvcsw;
+	l_long	ru_nivcsw;
+} __packed;
+
+/* mmap options */
+#define	LINUX_MAP_SHARED	0x0001
+#define	LINUX_MAP_PRIVATE	0x0002
+#define	LINUX_MAP_FIXED		0x0010
+#define	LINUX_MAP_ANON		0x0020
+#define	LINUX_MAP_GROWSDOWN	0x0100
+
+struct l_mmap_argv {
+	l_uintptr_t	addr;
+	l_size_t	len;
+	l_int		prot;
+	l_int		flags;
+	l_int		fd;
+	l_ulong		pgoff;
+};
+
+/*
+ * stat family of syscalls
+ */
+struct l_timespec {
+	l_time_t	tv_sec;
+	l_long		tv_nsec;
+} __packed;
+
+struct l_newstat {
+	l_ushort	st_dev;
+	l_ushort	__pad1;
+	l_ulong		st_ino;
+	l_ushort	st_mode;
+	l_ushort	st_nlink;
+	l_ushort	st_uid;
+	l_ushort	st_gid;
+	l_ushort	st_rdev;
+	l_ushort	__pad2;
+	l_ulong		st_size;
+	l_ulong		st_blksize;
+	l_ulong		st_blocks;
+	struct l_timespec	st_atim;
+	struct l_timespec	st_mtim;
+	struct l_timespec	st_ctim;
+	l_ulong		__unused4;
+	l_ulong		__unused5;
+} __packed;
+
+struct l_stat {
+	l_ushort	st_dev;
+	l_ulong		st_ino;
+	l_ushort	st_mode;
+	l_ushort	st_nlink;
+	l_ushort	st_uid;
+	l_ushort	st_gid;
+	l_ushort	st_rdev;
+	l_long		st_size;
+	struct l_timespec	st_atim;
+	struct l_timespec	st_mtim;
+	struct l_timespec	st_ctim;
+	l_long		st_blksize;
+	l_long		st_blocks;
+	l_ulong		st_flags;
+	l_ulong		st_gen;
+};
+
+struct l_stat64 {
+	l_ushort	st_dev;
+	u_char		__pad0[10];
+	l_ulong		__st_ino;
+	l_uint		st_mode;
+	l_uint		st_nlink;
+	l_ulong		st_uid;
+	l_ulong		st_gid;
+	l_ushort	st_rdev;
+	u_char		__pad3[10];
+	l_longlong	st_size;
+	l_ulong		st_blksize;
+	l_ulong		st_blocks;
+	l_ulong		__pad4;
+	struct l_timespec	st_atim;
+	struct l_timespec	st_mtim;
+	struct l_timespec	st_ctim;
+	l_ulonglong	st_ino;
+} __packed;
+
+struct l_statfs64 { 
+        l_int           f_type; 
+        l_int           f_bsize; 
+        uint64_t        f_blocks; 
+        uint64_t        f_bfree; 
+        uint64_t        f_bavail; 
+        uint64_t        f_files; 
+        uint64_t        f_ffree; 
+        l_fsid_t        f_fsid;
+        l_int           f_namelen;
+        l_int           f_spare[6];
+} __packed;
+
+/*
+ * Signalling
+ */
+#define	LINUX_SIGHUP		1
+#define	LINUX_SIGINT		2
+#define	LINUX_SIGQUIT		3
+#define	LINUX_SIGILL		4
+#define	LINUX_SIGTRAP		5
+#define	LINUX_SIGABRT		6
+#define	LINUX_SIGIOT		LINUX_SIGABRT
+#define	LINUX_SIGBUS		7
+#define	LINUX_SIGFPE		8
+#define	LINUX_SIGKILL		9
+#define	LINUX_SIGUSR1		10
+#define	LINUX_SIGSEGV		11
+#define	LINUX_SIGUSR2		12
+#define	LINUX_SIGPIPE		13
+#define	LINUX_SIGALRM		14
+#define	LINUX_SIGTERM		15
+#define	LINUX_SIGSTKFLT		16
+#define	LINUX_SIGCHLD		17
+#define	LINUX_SIGCONT		18
+#define	LINUX_SIGSTOP		19
+#define	LINUX_SIGTSTP		20
+#define	LINUX_SIGTTIN		21
+#define	LINUX_SIGTTOU		22
+#define	LINUX_SIGURG		23
+#define	LINUX_SIGXCPU		24
+#define	LINUX_SIGXFSZ		25
+#define	LINUX_SIGVTALRM		26
+#define	LINUX_SIGPROF		27
+#define	LINUX_SIGWINCH		28
+#define	LINUX_SIGIO		29
+#define	LINUX_SIGPOLL		LINUX_SIGIO
+#define	LINUX_SIGPWR		30
+#define	LINUX_SIGSYS		31
+#define	LINUX_SIGRTMIN		32
+
+#define	LINUX_SIGTBLSZ		31
+#define	LINUX_NSIG_WORDS	2
+#define	LINUX_NBPW		32
+#define	LINUX_NSIG		(LINUX_NBPW * LINUX_NSIG_WORDS)
+
+/* sigaction flags */
+#define	LINUX_SA_NOCLDSTOP	0x00000001
+#define	LINUX_SA_NOCLDWAIT	0x00000002
+#define	LINUX_SA_SIGINFO	0x00000004
+#define	LINUX_SA_RESTORER	0x04000000
+#define	LINUX_SA_ONSTACK	0x08000000
+#define	LINUX_SA_RESTART	0x10000000
+#define	LINUX_SA_INTERRUPT	0x20000000
+#define	LINUX_SA_NOMASK		0x40000000
+#define	LINUX_SA_ONESHOT	0x80000000
+
+/* sigprocmask actions */
+#define	LINUX_SIG_BLOCK		0
+#define	LINUX_SIG_UNBLOCK	1
+#define	LINUX_SIG_SETMASK	2
+
+/* sigset_t macros */
+#define	LINUX_SIGEMPTYSET(set)		(set).__bits[0] = (set).__bits[1] = 0
+#define	LINUX_SIGISMEMBER(set, sig)	SIGISMEMBER(set, sig)
+#define	LINUX_SIGADDSET(set, sig)	SIGADDSET(set, sig)
+
+/* sigaltstack */
+#define	LINUX_MINSIGSTKSZ	2048
+#define	LINUX_SS_ONSTACK	1
+#define	LINUX_SS_DISABLE	2
+
+int linux_to_bsd_sigaltstack(int lsa);
+int bsd_to_linux_sigaltstack(int bsa);
+
+typedef l_uintptr_t l_handler_t;
+typedef l_ulong	l_osigset_t;
+
+typedef struct {
+	l_uint	__bits[LINUX_NSIG_WORDS];
+} __packed l_sigset_t;
+
+typedef struct {
+	l_handler_t	lsa_handler;
+	l_osigset_t	lsa_mask;
+	l_ulong		lsa_flags;
+	l_uintptr_t	lsa_restorer;
+} __packed l_osigaction_t;
+
+typedef struct {
+	l_handler_t	lsa_handler;
+	l_ulong		lsa_flags;
+	l_uintptr_t	lsa_restorer;
+	l_sigset_t	lsa_mask;
+} __packed l_sigaction_t;
+
+typedef struct {
+	l_uintptr_t	ss_sp;
+	l_int		ss_flags;
+	l_size_t	ss_size;
+} __packed l_stack_t;
+
+/* The Linux sigcontext, pretty much a standard 386 trapframe. */
+struct l_sigcontext {
+	l_uint		sc_gs;
+	l_uint		sc_fs;
+	l_uint		sc_es;
+	l_uint		sc_ds;
+	l_uint		sc_edi;
+	l_uint		sc_esi;
+	l_uint		sc_ebp;
+	l_uint		sc_esp;
+	l_uint		sc_ebx;
+	l_uint		sc_edx;
+	l_uint		sc_ecx;
+	l_uint		sc_eax;
+	l_uint		sc_trapno;
+	l_uint		sc_err;
+	l_uint		sc_eip;
+	l_uint		sc_cs;
+	l_uint		sc_eflags;
+	l_uint		sc_esp_at_signal;
+	l_uint		sc_ss;
+	l_uint		sc_387;
+	l_uint		sc_mask;
+	l_uint		sc_cr2;
+} __packed;
+
+struct l_ucontext {
+	l_ulong		uc_flags;
+	l_uintptr_t	uc_link;
+	l_stack_t	uc_stack;
+	struct l_sigcontext	uc_mcontext;
+	l_sigset_t	uc_sigmask;
+} __packed;
+
+#define	LINUX_SI_MAX_SIZE	128
+#define	LINUX_SI_PAD_SIZE	((LINUX_SI_MAX_SIZE/sizeof(l_int)) - 3)
+
+typedef union l_sigval {
+	l_int		sival_int;
+	l_uintptr_t	sival_ptr;
+} l_sigval_t;
+
+typedef struct l_siginfo {
+	l_int		lsi_signo;
+	l_int		lsi_errno;
+	l_int		lsi_code;
+	union {
+		l_int	_pad[LINUX_SI_PAD_SIZE];
+
+		struct {
+			l_pid_t		_pid;
+			l_uid_t		_uid;
+		} __packed _kill;
+
+		struct {
+			l_timer_t	_tid;
+			l_int		_overrun;
+			char		_pad[sizeof(l_uid_t) - sizeof(l_int)];
+			l_sigval_t	_sigval;
+			l_int		_sys_private;
+		} __packed _timer;
+
+		struct {
+			l_pid_t		_pid;		/* sender's pid */
+			l_uid_t		_uid;		/* sender's uid */
+			l_sigval_t	_sigval;
+		} __packed _rt;
+
+		struct {
+			l_pid_t		_pid;		/* which child */
+			l_uid_t		_uid;		/* sender's uid */
+			l_int		_status;	/* exit code */
+			l_clock_t	_utime;
+			l_clock_t	_stime;
+		} __packed _sigchld;
+
+		struct {
+			l_uintptr_t	_addr;	/* Faulting insn/memory ref. */
+		} __packed _sigfault;
+
+		struct {
+			l_long		_band;	/* POLL_IN,POLL_OUT,POLL_MSG */
+			l_int		_fd;
+		} __packed _sigpoll;
+	} _sifields;
+} __packed l_siginfo_t;
+
+#define	lsi_pid		_sifields._kill._pid
+#define	lsi_uid		_sifields._kill._uid
+#define	lsi_tid		_sifields._timer._tid
+#define	lsi_overrun	_sifields._timer._overrun
+#define	lsi_sys_private	_sifields._timer._sys_private
+#define	lsi_status	_sifields._sigchld._status
+#define	lsi_utime	_sifields._sigchld._utime
+#define	lsi_stime	_sifields._sigchld._stime
+#define	lsi_value	_sifields._rt._sigval
+#define	lsi_int		_sifields._rt._sigval.sival_int
+#define	lsi_ptr		_sifields._rt._sigval.sival_ptr
+#define	lsi_addr	_sifields._sigfault._addr
+#define	lsi_band	_sifields._sigpoll._band
+#define	lsi_fd		_sifields._sigpoll._fd
+
+struct l_fpreg {
+	u_int16_t	significand[4];
+	u_int16_t	exponent;
+} __packed;
+
+struct l_fpxreg {
+	u_int16_t	significand[4];
+	u_int16_t	exponent;
+	u_int16_t	padding[3];
+} __packed;
+
+struct l_xmmreg {
+	u_int32_t	element[4];
+} __packed;
+
+struct l_fpstate {
+	/* Regular FPU environment */
+	u_int32_t		cw;
+	u_int32_t		sw;
+	u_int32_t		tag;
+	u_int32_t		ipoff;
+	u_int32_t		cssel;
+	u_int32_t		dataoff;
+	u_int32_t		datasel;
+	struct l_fpreg		_st[8];
+	u_int16_t		status;
+	u_int16_t		magic;		/* 0xffff = regular FPU data */
+
+	/* FXSR FPU environment */
+	u_int32_t		_fxsr_env[6];	/* env is ignored. */
+	u_int32_t		mxcsr;
+	u_int32_t		reserved;
+	struct l_fpxreg		_fxsr_st[8];	/* reg data is ignored. */
+	struct l_xmmreg		_xmm[8];
+	u_int32_t		padding[56];
+} __packed;
+
+/*
+ * We make the stack look like Linux expects it when calling a signal
+ * handler, but use the BSD way of calling the handler and sigreturn().
+ * This means that we need to pass the pointer to the handler too.
+ * It is appended to the frame to not interfere with the rest of it.
+ */
+struct l_sigframe {
+	l_int			sf_sig;
+	struct l_sigcontext	sf_sc;
+	struct l_fpstate	sf_fpstate;
+	l_uint			sf_extramask[LINUX_NSIG_WORDS-1];
+	l_handler_t		sf_handler;
+} __packed;
+
+struct l_rt_sigframe {
+	l_int			sf_sig;
+	l_uintptr_t 		sf_siginfo;
+	l_uintptr_t		sf_ucontext;
+	l_siginfo_t		sf_si;
+	struct l_ucontext 	sf_sc;
+	l_handler_t 		sf_handler;
+} __packed;
+
+extern struct sysentvec elf_linux_sysvec;
+
+/*
+ * open/fcntl flags
+ */
+#define	LINUX_O_RDONLY		00000000
+#define	LINUX_O_WRONLY		00000001
+#define	LINUX_O_RDWR		00000002
+#define	LINUX_O_ACCMODE		00000003
+#define	LINUX_O_CREAT		00000100
+#define	LINUX_O_EXCL		00000200
+#define	LINUX_O_NOCTTY		00000400
+#define	LINUX_O_TRUNC		00001000
+#define	LINUX_O_APPEND		00002000
+#define	LINUX_O_NONBLOCK	00004000
+#define	LINUX_O_NDELAY		LINUX_O_NONBLOCK
+#define	LINUX_O_SYNC		00010000
+#define	LINUX_FASYNC		00020000
+#define	LINUX_O_DIRECT		00040000	/* Direct disk access hint */
+#define	LINUX_O_LARGEFILE	00100000
+#define	LINUX_O_DIRECTORY	00200000	/* Must be a directory */
+#define	LINUX_O_NOFOLLOW	00400000	/* Do not follow links */
+#define	LINUX_O_NOATIME		01000000
+#define	LINUX_O_CLOEXEC		02000000
+
+#define	LINUX_F_DUPFD		0
+#define	LINUX_F_GETFD		1
+#define	LINUX_F_SETFD		2
+#define	LINUX_F_GETFL		3
+#define	LINUX_F_SETFL		4
+#define	LINUX_F_GETLK		5
+#define	LINUX_F_SETLK		6
+#define	LINUX_F_SETLKW		7
+#define	LINUX_F_SETOWN		8
+#define	LINUX_F_GETOWN		9
+
+#define	LINUX_F_GETLK64		12
+#define	LINUX_F_SETLK64		13
+#define	LINUX_F_SETLKW64	14
+
+#define	LINUX_F_RDLCK		0
+#define	LINUX_F_WRLCK		1
+#define	LINUX_F_UNLCK		2
+
+union l_semun {
+	l_int		val;
+	l_uintptr_t	buf;
+	l_uintptr_t	array;
+	l_uintptr_t	__buf;
+	l_uintptr_t	__pad;
+} __packed;
+
+/*
+ * Socket defines
+ */
+#define	LINUX_SOL_SOCKET	1
+#define	LINUX_SOL_IP		0
+#define	LINUX_SOL_IPX		256
+#define	LINUX_SOL_AX25		257
+#define	LINUX_SOL_TCP		6
+#define	LINUX_SOL_UDP		17
+
+#define	LINUX_SO_DEBUG		1
+#define	LINUX_SO_REUSEADDR	2
+#define	LINUX_SO_TYPE		3
+#define	LINUX_SO_ERROR		4
+#define	LINUX_SO_DONTROUTE	5
+#define	LINUX_SO_BROADCAST	6
+#define	LINUX_SO_SNDBUF		7
+#define	LINUX_SO_RCVBUF		8
+#define	LINUX_SO_KEEPALIVE	9
+#define	LINUX_SO_OOBINLINE	10
+#define	LINUX_SO_NO_CHECK	11
+#define	LINUX_SO_PRIORITY	12
+#define	LINUX_SO_LINGER		13
+#define	LINUX_SO_PEERCRED	17
+#define	LINUX_SO_RCVLOWAT	18
+#define	LINUX_SO_SNDLOWAT	19
+#define	LINUX_SO_RCVTIMEO	20
+#define	LINUX_SO_SNDTIMEO	21
+#define	LINUX_SO_TIMESTAMP	29
+#define	LINUX_SO_ACCEPTCONN	30
+
+struct l_sockaddr {
+	l_ushort	sa_family;
+	char		sa_data[14];
+} __packed;
+
+struct l_msghdr {
+	l_uintptr_t	msg_name;
+	l_int		msg_namelen;
+	l_uintptr_t	msg_iov;
+	l_size_t	msg_iovlen;
+	l_uintptr_t	msg_control;
+	l_size_t	msg_controllen;
+	l_uint		msg_flags;
+};
+
+struct l_cmsghdr {
+	l_size_t	cmsg_len;
+	l_int		cmsg_level;
+	l_int		cmsg_type;
+};
+
+struct l_ifmap {
+	l_ulong		mem_start;
+	l_ulong		mem_end;
+	l_ushort	base_addr;
+	u_char		irq;
+	u_char		dma;
+	u_char		port;
+} __packed;
+
+#define	LINUX_IFHWADDRLEN	6
+#define	LINUX_IFNAMSIZ		16
+
+struct l_ifreq {
+	union {
+		char	ifrn_name[LINUX_IFNAMSIZ];
+	} ifr_ifrn;
+
+	union {
+		struct l_sockaddr	ifru_addr;
+		struct l_sockaddr	ifru_dstaddr;
+		struct l_sockaddr	ifru_broadaddr;
+		struct l_sockaddr	ifru_netmask;
+		struct l_sockaddr	ifru_hwaddr;
+		l_short		ifru_flags[1];
+		l_int		ifru_metric;
+		l_int		ifru_mtu;
+		struct l_ifmap	ifru_map;
+		char		ifru_slave[LINUX_IFNAMSIZ];
+		l_uintptr_t	ifru_data;
+	} ifr_ifru;
+} __packed;
+
+#define	ifr_name	ifr_ifrn.ifrn_name	/* Interface name */
+#define	ifr_hwaddr	ifr_ifru.ifru_hwaddr	/* MAC address */
+
+struct l_ifconf {
+	int	ifc_len;
+	union {
+		l_uintptr_t	ifcu_buf;
+		l_uintptr_t	ifcu_req;
+	} ifc_ifcu;
+} __packed;
+
+#define	ifc_buf		ifc_ifcu.ifcu_buf
+#define	ifc_req		ifc_ifcu.ifcu_req
+
+/*
+ * poll()
+ */
+#define	LINUX_POLLIN		0x0001
+#define	LINUX_POLLPRI		0x0002
+#define	LINUX_POLLOUT		0x0004
+#define	LINUX_POLLERR		0x0008
+#define	LINUX_POLLHUP		0x0010
+#define	LINUX_POLLNVAL		0x0020
+#define	LINUX_POLLRDNORM	0x0040
+#define	LINUX_POLLRDBAND	0x0080
+#define	LINUX_POLLWRNORM	0x0100
+#define	LINUX_POLLWRBAND	0x0200
+#define	LINUX_POLLMSG		0x0400
+
+struct l_pollfd {
+	l_int		fd;
+	l_short		events;
+	l_short		revents;
+} __packed;
+
+struct l_user_desc {
+	l_uint		entry_number;
+	l_uint		base_addr;
+	l_uint		limit;
+	l_uint		seg_32bit:1;
+	l_uint		contents:2;
+	l_uint		read_exec_only:1;
+	l_uint		limit_in_pages:1;
+	l_uint		seg_not_present:1;
+	l_uint		useable:1;
+};
+
+#define	LINUX_LOWERWORD	0x0000ffff
+
+/*
+ * Macros which does the same thing as those in Linux include/asm-um/ldt-i386.h.
+ * These convert Linux user space descriptor to machine one.
+ */
+#define	LINUX_LDT_entry_a(info)					\
+	((((info)->base_addr & LINUX_LOWERWORD) << 16) |	\
+	((info)->limit & LINUX_LOWERWORD))
+
+#define	LINUX_ENTRY_B_READ_EXEC_ONLY	9
+#define	LINUX_ENTRY_B_CONTENTS		10
+#define	LINUX_ENTRY_B_SEG_NOT_PRESENT	15
+#define	LINUX_ENTRY_B_BASE_ADDR		16
+#define	LINUX_ENTRY_B_USEABLE		20
+#define	LINUX_ENTRY_B_SEG32BIT		22
+#define	LINUX_ENTRY_B_LIMIT		23
+
+#define	LINUX_LDT_entry_b(info)							\
+	(((info)->base_addr & 0xff000000) |					\
+	((info)->limit & 0xf0000) |						\
+	((info)->contents << LINUX_ENTRY_B_CONTENTS) |				\
+	(((info)->seg_not_present == 0) << LINUX_ENTRY_B_SEG_NOT_PRESENT) |	\
+	(((info)->base_addr & 0x00ff0000) >> LINUX_ENTRY_B_BASE_ADDR) |		\
+	(((info)->read_exec_only == 0) << LINUX_ENTRY_B_READ_EXEC_ONLY) |	\
+	((info)->seg_32bit << LINUX_ENTRY_B_SEG32BIT) |				\
+	((info)->useable << LINUX_ENTRY_B_USEABLE) |				\
+	((info)->limit_in_pages << LINUX_ENTRY_B_LIMIT) | 0x7000)
+
+#define	LINUX_LDT_empty(info)		\
+	((info)->base_addr == 0 &&	\
+	(info)->limit == 0 &&		\
+	(info)->contents == 0 &&	\
+	(info)->seg_not_present == 1 &&	\
+	(info)->read_exec_only == 1 &&	\
+	(info)->seg_32bit == 0 &&	\
+	(info)->limit_in_pages == 0 &&	\
+	(info)->useable == 0)
+
+/*
+ * Macros for converting segments.
+ * They do the same as those in arch/i386/kernel/process.c in Linux.
+ */
+#define	LINUX_GET_BASE(desc)				\
+	((((desc)->a >> 16) & LINUX_LOWERWORD) |	\
+	(((desc)->b << 16) & 0x00ff0000) |		\
+	((desc)->b & 0xff000000))
+
+#define	LINUX_GET_LIMIT(desc)			\
+	(((desc)->a & LINUX_LOWERWORD) |	\
+	((desc)->b & 0xf0000))
+
+#define	LINUX_GET_32BIT(desc)		\
+	(((desc)->b >> LINUX_ENTRY_B_SEG32BIT) & 1)
+#define	LINUX_GET_CONTENTS(desc)	\
+	(((desc)->b >> LINUX_ENTRY_B_CONTENTS) & 3)
+#define	LINUX_GET_WRITABLE(desc)	\
+	(((desc)->b >> LINUX_ENTRY_B_READ_EXEC_ONLY) & 1)
+#define	LINUX_GET_LIMIT_PAGES(desc)	\
+	(((desc)->b >> LINUX_ENTRY_B_LIMIT) & 1)
+#define	LINUX_GET_PRESENT(desc)		\
+	(((desc)->b >> LINUX_ENTRY_B_SEG_NOT_PRESENT) & 1)
+#define	LINUX_GET_USEABLE(desc)		\
+	(((desc)->b >> LINUX_ENTRY_B_USEABLE) & 1)
+
+struct iovec;
+
+struct l_iovec32 {
+	uint32_t	iov_base;
+	l_size_t	iov_len;
+};
+
+int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt,
+			    struct iovec **iovp, int error);
+
+/* robust futexes */
+struct linux_robust_list {
+	l_uintptr_t			next;
+};
+
+struct linux_robust_list_head {
+	struct linux_robust_list	list;
+	l_long				futex_offset;
+	l_uintptr_t			pending_list;
+};
+
+#endif /* !_AMD64_LINUX_H_ */
diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c
new file mode 100644
index 0000000..95bf3ec
--- /dev/null
+++ b/sys/amd64/linux32/linux32_dummy.c
@@ -0,0 +1,176 @@
+/*-
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer 
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/sdt.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <amd64/linux32/linux.h>
+#include <amd64/linux32/linux32_proto.h>
+#include <compat/linux/linux_dtrace.h>
+#include <compat/linux/linux_util.h>
+
+/* DTrace init */
+LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
+
+DUMMY(stime);
+DUMMY(olduname);
+DUMMY(syslog);
+DUMMY(uname);
+DUMMY(vhangup);
+DUMMY(swapoff);
+DUMMY(adjtimex);
+DUMMY(create_module);
+DUMMY(init_module);
+DUMMY(delete_module);
+DUMMY(get_kernel_syms);
+DUMMY(quotactl);
+DUMMY(bdflush);
+DUMMY(sysfs);
+DUMMY(query_module);
+DUMMY(nfsservctl);
+DUMMY(rt_sigqueueinfo);
+DUMMY(sendfile);
+DUMMY(setfsuid);
+DUMMY(setfsgid);
+DUMMY(pivot_root);
+DUMMY(mincore);
+DUMMY(ptrace);
+DUMMY(lookup_dcookie);
+DUMMY(epoll_create);
+DUMMY(epoll_ctl);
+DUMMY(epoll_wait);
+DUMMY(remap_file_pages);
+DUMMY(timer_create);
+DUMMY(timer_settime);
+DUMMY(timer_gettime);
+DUMMY(timer_getoverrun);
+DUMMY(timer_delete);
+DUMMY(fstatfs64);
+DUMMY(mbind);
+DUMMY(get_mempolicy);
+DUMMY(set_mempolicy);
+DUMMY(mq_open);
+DUMMY(mq_unlink);
+DUMMY(mq_timedsend);
+DUMMY(mq_timedreceive);
+DUMMY(mq_notify);
+DUMMY(mq_getsetattr);
+DUMMY(kexec_load);
+DUMMY(waitid);
+/* linux 2.6.11: */
+DUMMY(add_key);
+DUMMY(request_key);
+DUMMY(keyctl);
+/* linux 2.6.13: */
+DUMMY(ioprio_set);
+DUMMY(ioprio_get);
+DUMMY(inotify_init);
+DUMMY(inotify_add_watch);
+DUMMY(inotify_rm_watch);
+/* linux 2.6.16: */
+DUMMY(migrate_pages);
+DUMMY(pselect6);
+DUMMY(ppoll);
+DUMMY(unshare);
+/* linux 2.6.17: */
+DUMMY(splice);
+DUMMY(sync_file_range);
+DUMMY(tee);
+DUMMY(vmsplice);
+/* linux 2.6.18: */
+DUMMY(move_pages);
+/* linux 2.6.19: */
+DUMMY(getcpu);
+DUMMY(epoll_pwait);
+/* linux 2.6.22: */
+DUMMY(utimensat);
+DUMMY(signalfd);
+DUMMY(timerfd_create);
+DUMMY(eventfd);
+/* linux 2.6.23: */
+DUMMY(fallocate);
+/* linux 2.6.25: */
+DUMMY(timerfd_settime);
+DUMMY(timerfd_gettime);
+/* linux 2.6.27: */
+DUMMY(signalfd4);
+DUMMY(eventfd2);
+DUMMY(epoll_create1);
+DUMMY(dup3);
+DUMMY(inotify_init1);
+/* linux 2.6.30: */
+DUMMY(preadv);
+DUMMY(pwritev);
+/* linux 2.6.31: */
+DUMMY(rt_tsigqueueinfo);
+DUMMY(perf_event_open);
+/* linux 2.6.33: */
+DUMMY(recvmmsg);
+DUMMY(fanotify_init);
+DUMMY(fanotify_mark);
+/* linux 2.6.36: */
+DUMMY(prlimit64);
+/* later: */
+DUMMY(name_to_handle_at);
+DUMMY(open_by_handle_at);
+DUMMY(clock_adjtime);
+DUMMY(syncfs);
+DUMMY(sendmmsg);
+DUMMY(setns);
+DUMMY(process_vm_readv);
+DUMMY(process_vm_writev);
+
+#define DUMMY_XATTR(s)						\
+int								\
+linux_ ## s ## xattr(						\
+    struct thread *td, struct linux_ ## s ## xattr_args *arg)	\
+{								\
+								\
+	return (ENOATTR);					\
+}
+DUMMY_XATTR(set);
+DUMMY_XATTR(lset);
+DUMMY_XATTR(fset);
+DUMMY_XATTR(get);
+DUMMY_XATTR(lget);
+DUMMY_XATTR(fget);
+DUMMY_XATTR(list);
+DUMMY_XATTR(llist);
+DUMMY_XATTR(flist);
+DUMMY_XATTR(remove);
+DUMMY_XATTR(lremove);
+DUMMY_XATTR(fremove);
diff --git a/sys/amd64/linux32/linux32_genassym.c b/sys/amd64/linux32/linux32_genassym.c
new file mode 100644
index 0000000..a022fac
--- /dev/null
+++ b/sys/amd64/linux32/linux32_genassym.c
@@ -0,0 +1,14 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/assym.h>
+#include <sys/systm.h>
+
+#include <amd64/linux32/linux.h>
+
+ASSYM(LINUX_SIGF_HANDLER, offsetof(struct l_sigframe, sf_handler));
+ASSYM(LINUX_SIGF_SC, offsetof(struct l_sigframe, sf_sc));
+ASSYM(LINUX_RT_SIGF_HANDLER, offsetof(struct l_rt_sigframe, sf_handler));
+ASSYM(LINUX_RT_SIGF_UC, offsetof(struct l_rt_sigframe, sf_sc));
+ASSYM(LINUX_RT_SIGF_SC, offsetof(struct l_ucontext, uc_mcontext));
diff --git a/sys/amd64/linux32/linux32_ipc64.h b/sys/amd64/linux32/linux32_ipc64.h
new file mode 100644
index 0000000..f8c92c4
--- /dev/null
+++ b/sys/amd64/linux32/linux32_ipc64.h
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (c) 2002 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer 
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _AMD64_LINUX_LINUX_IPC64_H_
+#define	_AMD64_LINUX_LINUX_IPC64_H_
+
+/*
+ * The ipc64_perm structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct l_ipc64_perm
+{
+	l_key_t		key;
+	l_uid_t		uid;
+	l_gid_t		gid;
+	l_uid_t		cuid;
+	l_gid_t		cgid;
+	l_mode_t	mode;
+	l_ushort	__pad1;
+	l_ushort	seq;
+	l_ushort	__pad2;
+	l_ulong		__unused1;
+	l_ulong		__unused2;
+} __packed;
+
+/*
+ * The msqid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct l_msqid64_ds {
+	struct l_ipc64_perm msg_perm;
+	l_time_t	msg_stime;	/* last msgsnd time */
+	l_ulong		__unused1;
+	l_time_t	msg_rtime;	/* last msgrcv time */
+	l_ulong		__unused2;
+	l_time_t	msg_ctime;	/* last change time */
+	l_ulong		__unused3;
+	l_ulong		msg_cbytes;	/* current number of bytes on queue */
+	l_ulong		msg_qnum;	/* number of messages in queue */
+	l_ulong		msg_qbytes;	/* max number of bytes on queue */
+	l_pid_t		msg_lspid;	/* pid of last msgsnd */
+	l_pid_t		msg_lrpid;	/* last receive pid */
+	l_ulong		__unused4;
+	l_ulong		__unused5;
+} __packed;
+
+/*
+ * The semid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct l_semid64_ds {
+	struct l_ipc64_perm sem_perm;	/* permissions */
+	l_time_t	sem_otime;	/* last semop time */
+	l_ulong		__unused1;
+	l_time_t	sem_ctime;	/* last change time */
+	l_ulong		__unused2;
+	l_ulong		sem_nsems;	/* no. of semaphores in array */
+	l_ulong		__unused3;
+	l_ulong		__unused4;
+} __packed;
+
+/*
+ * The shmid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct l_shmid64_ds {
+	struct l_ipc64_perm shm_perm;	/* operation perms */
+	l_size_t	shm_segsz;	/* size of segment (bytes) */
+	l_time_t	shm_atime;	/* last attach time */
+	l_ulong		__unused1;
+	l_time_t	shm_dtime;	/* last detach time */
+	l_ulong		__unused2;
+	l_time_t	shm_ctime;	/* last change time */
+	l_ulong		__unused3;
+	l_pid_t		shm_cpid;	/* pid of creator */
+	l_pid_t		shm_lpid;	/* pid of last operator */
+	l_ulong		shm_nattch;	/* no. of current attaches */
+	l_ulong		__unused4;
+	l_ulong		__unused5;
+} __packed;
+
+struct l_shminfo64 {
+	l_ulong   	shmmax;
+	l_ulong   	shmmin;
+	l_ulong   	shmmni;
+	l_ulong   	shmseg;
+	l_ulong   	shmall;
+	l_ulong   	__unused1;
+	l_ulong   	__unused2;
+	l_ulong   	__unused3;
+	l_ulong   	__unused4;
+} __packed;
+
+#endif /* !_AMD64_LINUX_LINUX_IPC64_H_ */
diff --git a/sys/amd64/linux32/linux32_locore.s b/sys/amd64/linux32/linux32_locore.s
new file mode 100644
index 0000000..36e1abf
--- /dev/null
+++ b/sys/amd64/linux32/linux32_locore.s
@@ -0,0 +1,38 @@
+/* $FreeBSD$ */
+
+#include "linux32_assym.h"			/* system definitions */
+#include <machine/asmacros.h>			/* miscellaneous asm macros */
+
+#include <amd64/linux32/linux32_syscall.h>	/* system call numbers */
+
+.text
+.code32
+
+NON_GPROF_ENTRY(linux_sigcode)
+	call	*LINUX_SIGF_HANDLER(%esp)
+	leal	LINUX_SIGF_SC(%esp),%ebx	/* linux scp */
+	movl	%esp, %ebx			/* pass sigframe */
+	push	%eax				/* fake ret addr */
+	movl	$LINUX_SYS_linux_sigreturn,%eax	/* linux_sigreturn() */
+	int	$0x80				/* enter kernel with args */
+0:	jmp	0b
+	ALIGN_TEXT
+/* XXXXX */
+linux_rt_sigcode:
+	call	*LINUX_RT_SIGF_HANDLER(%esp)
+	leal	LINUX_RT_SIGF_UC(%esp),%ebx	/* linux ucp */
+	leal	LINUX_RT_SIGF_SC(%ebx),%ecx	/* linux sigcontext */
+	push	%eax				/* fake ret addr */
+	movl	$LINUX_SYS_linux_rt_sigreturn,%eax   /* linux_rt_sigreturn() */
+	int	$0x80				/* enter kernel with args */
+0:	jmp	0b
+	ALIGN_TEXT
+/* XXXXX */
+linux_esigcode:
+
+	.data
+	.globl	linux_szsigcode, linux_sznonrtsigcode
+linux_szsigcode:
+	.long	linux_esigcode-linux_sigcode
+linux_sznonrtsigcode:
+	.long	linux_rt_sigcode-linux_sigcode
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
new file mode 100644
index 0000000..7725163
--- /dev/null
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -0,0 +1,1067 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/capability.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/clock.h>
+#include <sys/imgact.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/unistd.h>
+#include <sys/wait.h>
+
+#include <machine/frame.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <compat/freebsd32/freebsd32_util.h>
+#include <amd64/linux32/linux.h>
+#include <amd64/linux32/linux32_proto.h>
+#include <compat/linux/linux_ipc.h>
+#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_signal.h>
+#include <compat/linux/linux_util.h>
+#include <compat/linux/linux_emul.h>
+
+struct l_old_select_argv {
+	l_int		nfds;
+	l_uintptr_t	readfds;
+	l_uintptr_t	writefds;
+	l_uintptr_t	exceptfds;
+	l_uintptr_t	timeout;
+} __packed;
+
+int
+linux_to_bsd_sigaltstack(int lsa)
+{
+	int bsa = 0;
+
+	if (lsa & LINUX_SS_DISABLE)
+		bsa |= SS_DISABLE;
+	if (lsa & LINUX_SS_ONSTACK)
+		bsa |= SS_ONSTACK;
+	return (bsa);
+}
+
+static int	linux_mmap_common(struct thread *td, l_uintptr_t addr,
+		    l_size_t len, l_int prot, l_int flags, l_int fd,
+		    l_loff_t pos);
+
+int
+bsd_to_linux_sigaltstack(int bsa)
+{
+	int lsa = 0;
+
+	if (bsa & SS_DISABLE)
+		lsa |= LINUX_SS_DISABLE;
+	if (bsa & SS_ONSTACK)
+		lsa |= LINUX_SS_ONSTACK;
+	return (lsa);
+}
+
+static void
+bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
+{
+
+	lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
+	lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
+	lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
+	lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
+	lru->ru_maxrss = ru->ru_maxrss;
+	lru->ru_ixrss = ru->ru_ixrss;
+	lru->ru_idrss = ru->ru_idrss;
+	lru->ru_isrss = ru->ru_isrss;
+	lru->ru_minflt = ru->ru_minflt;
+	lru->ru_majflt = ru->ru_majflt;
+	lru->ru_nswap = ru->ru_nswap;
+	lru->ru_inblock = ru->ru_inblock;
+	lru->ru_oublock = ru->ru_oublock;
+	lru->ru_msgsnd = ru->ru_msgsnd;
+	lru->ru_msgrcv = ru->ru_msgrcv;
+	lru->ru_nsignals = ru->ru_nsignals;
+	lru->ru_nvcsw = ru->ru_nvcsw;
+	lru->ru_nivcsw = ru->ru_nivcsw;
+}
+
+int
+linux_execve(struct thread *td, struct linux_execve_args *args)
+{
+	struct image_args eargs;
+	char *path;
+	int error;
+
+	LCONVPATHEXIST(td, args->path, &path);
+
+#ifdef DEBUG
+	if (ldebug(execve))
+		printf(ARGS(execve, "%s"), path);
+#endif
+
+	error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE,
+	    args->argp, args->envp);
+	free(path, M_TEMP);
+	if (error == 0)
+		error = kern_execve(td, &eargs, NULL);
+	if (error == 0)
+		/* Linux process can execute FreeBSD one, do not attempt
+		 * to create emuldata for such process using
+		 * linux_proc_init, this leads to a panic on KASSERT
+		 * because such process has p->p_emuldata == NULL.
+		 */
+		if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX)
+			error = linux_proc_init(td, 0, 0);
+	return (error);
+}
+
+CTASSERT(sizeof(struct l_iovec32) == 8);
+
+static int
+linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
+{
+	struct l_iovec32 iov32;
+	struct iovec *iov;
+	struct uio *uio;
+	uint32_t iovlen;
+	int error, i;
+
+	*uiop = NULL;
+	if (iovcnt > UIO_MAXIOV)
+		return (EINVAL);
+	iovlen = iovcnt * sizeof(struct iovec);
+	uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
+	iov = (struct iovec *)(uio + 1);
+	for (i = 0; i < iovcnt; i++) {
+		error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
+		if (error) {
+			free(uio, M_IOV);
+			return (error);
+		}
+		iov[i].iov_base = PTRIN(iov32.iov_base);
+		iov[i].iov_len = iov32.iov_len;
+	}
+	uio->uio_iov = iov;
+	uio->uio_iovcnt = iovcnt;
+	uio->uio_segflg = UIO_USERSPACE;
+	uio->uio_offset = -1;
+	uio->uio_resid = 0;
+	for (i = 0; i < iovcnt; i++) {
+		if (iov->iov_len > INT_MAX - uio->uio_resid) {
+			free(uio, M_IOV);
+			return (EINVAL);
+		}
+		uio->uio_resid += iov->iov_len;
+		iov++;
+	}
+	*uiop = uio;
+	return (0);
+}
+
+int
+linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
+    int error)
+{
+	struct l_iovec32 iov32;
+	struct iovec *iov;
+	uint32_t iovlen;
+	int i;
+
+	*iovp = NULL;
+	if (iovcnt > UIO_MAXIOV)
+		return (error);
+	iovlen = iovcnt * sizeof(struct iovec);
+	iov = malloc(iovlen, M_IOV, M_WAITOK);
+	for (i = 0; i < iovcnt; i++) {
+		error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
+		if (error) {
+			free(iov, M_IOV);
+			return (error);
+		}
+		iov[i].iov_base = PTRIN(iov32.iov_base);
+		iov[i].iov_len = iov32.iov_len;
+	}
+	*iovp = iov;
+	return(0);
+
+}
+
+int
+linux_readv(struct thread *td, struct linux_readv_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_readv(td, uap->fd, auio);
+	free(auio, M_IOV);
+	return (error);
+}
+
+int
+linux_writev(struct thread *td, struct linux_writev_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_writev(td, uap->fd, auio);
+	free(auio, M_IOV);
+	return (error);
+}
+
+struct l_ipc_kludge {
+	l_uintptr_t msgp;
+	l_long msgtyp;
+} __packed;
+
+int
+linux_ipc(struct thread *td, struct linux_ipc_args *args)
+{
+
+	switch (args->what & 0xFFFF) {
+	case LINUX_SEMOP: {
+		struct linux_semop_args a;
+
+		a.semid = args->arg1;
+		a.tsops = args->ptr;
+		a.nsops = args->arg2;
+		return (linux_semop(td, &a));
+	}
+	case LINUX_SEMGET: {
+		struct linux_semget_args a;
+
+		a.key = args->arg1;
+		a.nsems = args->arg2;
+		a.semflg = args->arg3;
+		return (linux_semget(td, &a));
+	}
+	case LINUX_SEMCTL: {
+		struct linux_semctl_args a;
+		int error;
+
+		a.semid = args->arg1;
+		a.semnum = args->arg2;
+		a.cmd = args->arg3;
+		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
+		if (error)
+			return (error);
+		return (linux_semctl(td, &a));
+	}
+	case LINUX_MSGSND: {
+		struct linux_msgsnd_args a;
+
+		a.msqid = args->arg1;
+		a.msgp = args->ptr;
+		a.msgsz = args->arg2;
+		a.msgflg = args->arg3;
+		return (linux_msgsnd(td, &a));
+	}
+	case LINUX_MSGRCV: {
+		struct linux_msgrcv_args a;
+
+		a.msqid = args->arg1;
+		a.msgsz = args->arg2;
+		a.msgflg = args->arg3;
+		if ((args->what >> 16) == 0) {
+			struct l_ipc_kludge tmp;
+			int error;
+
+			if (args->ptr == 0)
+				return (EINVAL);
+			error = copyin(args->ptr, &tmp, sizeof(tmp));
+			if (error)
+				return (error);
+			a.msgp = PTRIN(tmp.msgp);
+			a.msgtyp = tmp.msgtyp;
+		} else {
+			a.msgp = args->ptr;
+			a.msgtyp = args->arg5;
+		}
+		return (linux_msgrcv(td, &a));
+	}
+	case LINUX_MSGGET: {
+		struct linux_msgget_args a;
+
+		a.key = args->arg1;
+		a.msgflg = args->arg2;
+		return (linux_msgget(td, &a));
+	}
+	case LINUX_MSGCTL: {
+		struct linux_msgctl_args a;
+
+		a.msqid = args->arg1;
+		a.cmd = args->arg2;
+		a.buf = args->ptr;
+		return (linux_msgctl(td, &a));
+	}
+	case LINUX_SHMAT: {
+		struct linux_shmat_args a;
+
+		a.shmid = args->arg1;
+		a.shmaddr = args->ptr;
+		a.shmflg = args->arg2;
+		a.raddr = PTRIN((l_uint)args->arg3);
+		return (linux_shmat(td, &a));
+	}
+	case LINUX_SHMDT: {
+		struct linux_shmdt_args a;
+
+		a.shmaddr = args->ptr;
+		return (linux_shmdt(td, &a));
+	}
+	case LINUX_SHMGET: {
+		struct linux_shmget_args a;
+
+		a.key = args->arg1;
+		a.size = args->arg2;
+		a.shmflg = args->arg3;
+		return (linux_shmget(td, &a));
+	}
+	case LINUX_SHMCTL: {
+		struct linux_shmctl_args a;
+
+		a.shmid = args->arg1;
+		a.cmd = args->arg2;
+		a.buf = args->ptr;
+		return (linux_shmctl(td, &a));
+	}
+	default:
+		break;
+	}
+
+	return (EINVAL);
+}
+
+int
+linux_old_select(struct thread *td, struct linux_old_select_args *args)
+{
+	struct l_old_select_argv linux_args;
+	struct linux_select_args newsel;
+	int error;
+
+#ifdef DEBUG
+	if (ldebug(old_select))
+		printf(ARGS(old_select, "%p"), args->ptr);
+#endif
+
+	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
+	if (error)
+		return (error);
+
+	newsel.nfds = linux_args.nfds;
+	newsel.readfds = PTRIN(linux_args.readfds);
+	newsel.writefds = PTRIN(linux_args.writefds);
+	newsel.exceptfds = PTRIN(linux_args.exceptfds);
+	newsel.timeout = PTRIN(linux_args.timeout);
+	return (linux_select(td, &newsel));
+}
+
+int
+linux_set_cloned_tls(struct thread *td, void *desc)
+{
+	struct user_segment_descriptor sd;
+	struct l_user_desc info;
+	struct pcb *pcb;
+	int error;
+	int a[2];
+
+	error = copyin(desc, &info, sizeof(struct l_user_desc));
+	if (error) {
+		printf(LMSG("copyin failed!"));
+	} else {
+		/* We might copy out the entry_number as GUGS32_SEL. */
+		info.entry_number = GUGS32_SEL;
+		error = copyout(&info, desc, sizeof(struct l_user_desc));
+		if (error)
+			printf(LMSG("copyout failed!"));
+
+		a[0] = LINUX_LDT_entry_a(&info);
+		a[1] = LINUX_LDT_entry_b(&info);
+
+		memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+		if (ldebug(clone))
+			printf("Segment created in clone with "
+			    "CLONE_SETTLS: lobase: %x, hibase: %x, "
+			    "lolimit: %x, hilimit: %x, type: %i, "
+			    "dpl: %i, p: %i, xx: %i, long: %i, "
+			    "def32: %i, gran: %i\n", sd.sd_lobase,
+			    sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
+			    sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
+			    sd.sd_long, sd.sd_def32, sd.sd_gran);
+#endif
+		pcb = td->td_pcb;
+		pcb->pcb_gsbase = (register_t)info.base_addr;
+/* XXXKIB	pcb->pcb_gs32sd = sd; */
+		td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
+		set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT);
+	}
+
+	return (error);
+}
+
+int
+linux_set_upcall_kse(struct thread *td, register_t stack)
+{
+
+	td->td_frame->tf_rsp = stack;
+
+	return (0);
+}
+
+#define STACK_SIZE  (2 * 1024 * 1024)
+#define GUARD_SIZE  (4 * PAGE_SIZE)
+
+int
+linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
+{
+
+#ifdef DEBUG
+	if (ldebug(mmap2))
+		printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"),
+		    args->addr, args->len, args->prot,
+		    args->flags, args->fd, args->pgoff);
+#endif
+
+	return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
+		args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
+		PAGE_SIZE));
+}
+
+int
+linux_mmap(struct thread *td, struct linux_mmap_args *args)
+{
+	int error;
+	struct l_mmap_argv linux_args;
+
+	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
+	if (error)
+		return (error);
+
+#ifdef DEBUG
+	if (ldebug(mmap))
+		printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"),
+		    linux_args.addr, linux_args.len, linux_args.prot,
+		    linux_args.flags, linux_args.fd, linux_args.pgoff);
+#endif
+
+	return (linux_mmap_common(td, linux_args.addr, linux_args.len,
+	    linux_args.prot, linux_args.flags, linux_args.fd,
+	    (uint32_t)linux_args.pgoff));
+}
+
+static int
+linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
+    l_int flags, l_int fd, l_loff_t pos)
+{
+	struct proc *p = td->td_proc;
+	struct mmap_args /* {
+		caddr_t addr;
+		size_t len;
+		int prot;
+		int flags;
+		int fd;
+		long pad;
+		off_t pos;
+	} */ bsd_args;
+	int error;
+	struct file *fp;
+
+	error = 0;
+	bsd_args.flags = 0;
+	fp = NULL;
+
+	/*
+	 * Linux mmap(2):
+	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
+	 */
+	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
+		return (EINVAL);
+
+	if (flags & LINUX_MAP_SHARED)
+		bsd_args.flags |= MAP_SHARED;
+	if (flags & LINUX_MAP_PRIVATE)
+		bsd_args.flags |= MAP_PRIVATE;
+	if (flags & LINUX_MAP_FIXED)
+		bsd_args.flags |= MAP_FIXED;
+	if (flags & LINUX_MAP_ANON) {
+		/* Enforce pos to be on page boundary, then ignore. */
+		if ((pos & PAGE_MASK) != 0)
+			return (EINVAL);
+		pos = 0;
+		bsd_args.flags |= MAP_ANON;
+	} else
+		bsd_args.flags |= MAP_NOSYNC;
+	if (flags & LINUX_MAP_GROWSDOWN)
+		bsd_args.flags |= MAP_STACK;
+
+	/*
+	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
+	 * on Linux/i386. We do this to ensure maximum compatibility.
+	 * Linux/ia64 does the same in i386 emulation mode.
+	 */
+	bsd_args.prot = prot;
+	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
+		bsd_args.prot |= PROT_READ | PROT_EXEC;
+
+	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
+	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
+	if (bsd_args.fd != -1) {
+		/*
+		 * Linux follows Solaris mmap(2) description:
+		 * The file descriptor fildes is opened with
+		 * read permission, regardless of the
+		 * protection options specified.
+		 */
+
+		if ((error = fget(td, bsd_args.fd, CAP_MMAP, &fp)) != 0)
+			return (error);
+		if (fp->f_type != DTYPE_VNODE) {
+			fdrop(fp, td);
+			return (EINVAL);
+		}
+
+		/* Linux mmap() just fails for O_WRONLY files */
+		if (!(fp->f_flag & FREAD)) {
+			fdrop(fp, td);
+			return (EACCES);
+		}
+
+		fdrop(fp, td);
+	}
+
+	if (flags & LINUX_MAP_GROWSDOWN) {
+		/*
+		 * The Linux MAP_GROWSDOWN option does not limit auto
+		 * growth of the region.  Linux mmap with this option
+		 * takes as addr the inital BOS, and as len, the initial
+		 * region size.  It can then grow down from addr without
+		 * limit.  However, Linux threads has an implicit internal
+		 * limit to stack size of STACK_SIZE.  Its just not
+		 * enforced explicitly in Linux.  But, here we impose
+		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
+		 * region, since we can do this with our mmap.
+		 *
+		 * Our mmap with MAP_STACK takes addr as the maximum
+		 * downsize limit on BOS, and as len the max size of
+		 * the region.  It then maps the top SGROWSIZ bytes,
+		 * and auto grows the region down, up to the limit
+		 * in addr.
+		 *
+		 * If we don't use the MAP_STACK option, the effect
+		 * of this code is to allocate a stack region of a
+		 * fixed size of (STACK_SIZE - GUARD_SIZE).
+		 */
+
+		if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
+			/*
+			 * Some Linux apps will attempt to mmap
+			 * thread stacks near the top of their
+			 * address space.  If their TOS is greater
+			 * than vm_maxsaddr, vm_map_growstack()
+			 * will confuse the thread stack with the
+			 * process stack and deliver a SEGV if they
+			 * attempt to grow the thread stack past their
+			 * current stacksize rlimit.  To avoid this,
+			 * adjust vm_maxsaddr upwards to reflect
+			 * the current stacksize rlimit rather
+			 * than the maximum possible stacksize.
+			 * It would be better to adjust the
+			 * mmap'ed region, but some apps do not check
+			 * mmap's return value.
+			 */
+			PROC_LOCK(p);
+			p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
+			    lim_cur(p, RLIMIT_STACK);
+			PROC_UNLOCK(p);
+		}
+
+		/*
+		 * This gives us our maximum stack size and a new BOS.
+		 * If we're using VM_STACK, then mmap will just map
+		 * the top SGROWSIZ bytes, and let the stack grow down
+		 * to the limit at BOS.  If we're not using VM_STACK
+		 * we map the full stack, since we don't have a way
+		 * to autogrow it.
+		 */
+		if (len > STACK_SIZE - GUARD_SIZE) {
+			bsd_args.addr = (caddr_t)PTRIN(addr);
+			bsd_args.len = len;
+		} else {
+			bsd_args.addr = (caddr_t)PTRIN(addr) -
+			    (STACK_SIZE - GUARD_SIZE - len);
+			bsd_args.len = STACK_SIZE - GUARD_SIZE;
+		}
+	} else {
+		bsd_args.addr = (caddr_t)PTRIN(addr);
+		bsd_args.len  = len;
+	}
+	bsd_args.pos = pos;
+
+#ifdef DEBUG
+	if (ldebug(mmap))
+		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
+		    __func__,
+		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
+		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
+#endif
+	error = sys_mmap(td, &bsd_args);
+#ifdef DEBUG
+	if (ldebug(mmap))
+		printf("-> %s() return: 0x%x (0x%08x)\n",
+			__func__, error, (u_int)td->td_retval[0]);
+#endif
+	return (error);
+}
+
+int
+linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
+{
+	struct mprotect_args bsd_args;
+
+	bsd_args.addr = uap->addr;
+	bsd_args.len = uap->len;
+	bsd_args.prot = uap->prot;
+	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
+		bsd_args.prot |= PROT_READ | PROT_EXEC;
+	return (sys_mprotect(td, &bsd_args));
+}
+
+int
+linux_iopl(struct thread *td, struct linux_iopl_args *args)
+{
+	int error;
+
+	if (args->level < 0 || args->level > 3)
+		return (EINVAL);
+	if ((error = priv_check(td, PRIV_IO)) != 0)
+		return (error);
+	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
+		return (error);
+	td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
+	    (args->level * (PSL_IOPL / 3));
+
+	return (0);
+}
+
+int
+linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
+{
+	l_osigaction_t osa;
+	l_sigaction_t act, oact;
+	int error;
+
+#ifdef DEBUG
+	if (ldebug(sigaction))
+		printf(ARGS(sigaction, "%d, %p, %p"),
+		    args->sig, (void *)args->nsa, (void *)args->osa);
+#endif
+
+	if (args->nsa != NULL) {
+		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
+		if (error)
+			return (error);
+		act.lsa_handler = osa.lsa_handler;
+		act.lsa_flags = osa.lsa_flags;
+		act.lsa_restorer = osa.lsa_restorer;
+		LINUX_SIGEMPTYSET(act.lsa_mask);
+		act.lsa_mask.__bits[0] = osa.lsa_mask;
+	}
+
+	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
+	    args->osa ? &oact : NULL);
+
+	if (args->osa != NULL && !error) {
+		osa.lsa_handler = oact.lsa_handler;
+		osa.lsa_flags = oact.lsa_flags;
+		osa.lsa_restorer = oact.lsa_restorer;
+		osa.lsa_mask = oact.lsa_mask.__bits[0];
+		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
+	}
+
+	return (error);
+}
+
+/*
+ * Linux has two extra args, restart and oldmask.  We don't use these,
+ * but it seems that "restart" is actually a context pointer that
+ * enables the signal to happen with a different register set.
+ */
+int
+linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
+{
+	sigset_t sigmask;
+	l_sigset_t mask;
+
+#ifdef DEBUG
+	if (ldebug(sigsuspend))
+		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
+#endif
+
+	LINUX_SIGEMPTYSET(mask);
+	mask.__bits[0] = args->mask;
+	linux_to_bsd_sigset(&mask, &sigmask);
+	return (kern_sigsuspend(td, sigmask));
+}
+
+int
+linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
+{
+	l_sigset_t lmask;
+	sigset_t sigmask;
+	int error;
+
+#ifdef DEBUG
+	if (ldebug(rt_sigsuspend))
+		printf(ARGS(rt_sigsuspend, "%p, %d"),
+		    (void *)uap->newset, uap->sigsetsize);
+#endif
+
+	if (uap->sigsetsize != sizeof(l_sigset_t))
+		return (EINVAL);
+
+	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
+	if (error)
+		return (error);
+
+	linux_to_bsd_sigset(&lmask, &sigmask);
+	return (kern_sigsuspend(td, sigmask));
+}
+
+int
+linux_pause(struct thread *td, struct linux_pause_args *args)
+{
+	struct proc *p = td->td_proc;
+	sigset_t sigmask;
+
+#ifdef DEBUG
+	if (ldebug(pause))
+		printf(ARGS(pause, ""));
+#endif
+
+	PROC_LOCK(p);
+	sigmask = td->td_sigmask;
+	PROC_UNLOCK(p);
+	return (kern_sigsuspend(td, sigmask));
+}
+
+int
+linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
+{
+	stack_t ss, oss;
+	l_stack_t lss;
+	int error;
+
+#ifdef DEBUG
+	if (ldebug(sigaltstack))
+		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
+#endif
+
+	if (uap->uss != NULL) {
+		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
+		if (error)
+			return (error);
+
+		ss.ss_sp = PTRIN(lss.ss_sp);
+		ss.ss_size = lss.ss_size;
+		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
+	}
+	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
+	    (uap->uoss != NULL) ? &oss : NULL);
+	if (!error && uap->uoss != NULL) {
+		lss.ss_sp = PTROUT(oss.ss_sp);
+		lss.ss_size = oss.ss_size;
+		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
+		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
+	}
+
+	return (error);
+}
+
+int
+linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
+{
+	struct ftruncate_args sa;
+
+#ifdef DEBUG
+	if (ldebug(ftruncate64))
+		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
+		    (intmax_t)args->length);
+#endif
+
+	sa.fd = args->fd;
+	sa.length = args->length;
+	return sys_ftruncate(td, &sa);
+}
+
+int
+linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
+{
+	struct timeval atv;
+	l_timeval atv32;
+	struct timezone rtz;
+	int error = 0;
+
+	if (uap->tp) {
+		microtime(&atv);
+		atv32.tv_sec = atv.tv_sec;
+		atv32.tv_usec = atv.tv_usec;
+		error = copyout(&atv32, uap->tp, sizeof(atv32));
+	}
+	if (error == 0 && uap->tzp != NULL) {
+		rtz.tz_minuteswest = tz_minuteswest;
+		rtz.tz_dsttime = tz_dsttime;
+		error = copyout(&rtz, uap->tzp, sizeof(rtz));
+	}
+	return (error);
+}
+
+int
+linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
+{
+	l_timeval atv32;
+	struct timeval atv, *tvp;
+	struct timezone atz, *tzp;
+	int error;
+
+	if (uap->tp) {
+		error = copyin(uap->tp, &atv32, sizeof(atv32));
+		if (error)
+			return (error);
+		atv.tv_sec = atv32.tv_sec;
+		atv.tv_usec = atv32.tv_usec;
+		tvp = &atv;
+	} else
+		tvp = NULL;
+	if (uap->tzp) {
+		error = copyin(uap->tzp, &atz, sizeof(atz));
+		if (error)
+			return (error);
+		tzp = &atz;
+	} else
+		tzp = NULL;
+	return (kern_settimeofday(td, tvp, tzp));
+}
+
+int
+linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
+{
+	struct l_rusage s32;
+	struct rusage s;
+	int error;
+
+	error = kern_getrusage(td, uap->who, &s);
+	if (error != 0)
+		return (error);
+	if (uap->rusage != NULL) {
+		bsd_to_linux_rusage(&s, &s32);
+		error = copyout(&s32, uap->rusage, sizeof(s32));
+	}
+	return (error);
+}
+
+int
+linux_sched_rr_get_interval(struct thread *td,
+    struct linux_sched_rr_get_interval_args *uap)
+{
+	struct timespec ts;
+	struct l_timespec ts32;
+	int error;
+
+	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
+	if (error != 0)
+		return (error);
+	ts32.tv_sec = ts.tv_sec;
+	ts32.tv_nsec = ts.tv_nsec;
+	return (copyout(&ts32, uap->interval, sizeof(ts32)));
+}
+
+int
+linux_set_thread_area(struct thread *td,
+    struct linux_set_thread_area_args *args)
+{
+	struct l_user_desc info;
+	struct user_segment_descriptor sd;
+	struct pcb *pcb;
+	int a[2];
+	int error;
+
+	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
+	if (error)
+		return (error);
+
+#ifdef DEBUG
+	if (ldebug(set_thread_area))
+		printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
+		    "%i, %i, %i"), info.entry_number, info.base_addr,
+		    info.limit, info.seg_32bit, info.contents,
+		    info.read_exec_only, info.limit_in_pages,
+		    info.seg_not_present, info.useable);
+#endif
+
+	/*
+	 * Semantics of Linux version: every thread in the system has array
+	 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
+	 * This syscall loads one of the selected TLS decriptors with a value
+	 * and also loads GDT descriptors 6, 7 and 8 with the content of
+	 * the per-thread descriptors.
+	 *
+	 * Semantics of FreeBSD version: I think we can ignore that Linux has
+	 * three per-thread descriptors and use just the first one.
+	 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
+	 * for loading the GDT descriptors. We use just one GDT descriptor
+	 * for TLS, so we will load just one.
+	 *
+	 * XXX: This doesn't work when a user space process tries to use more
+	 * than one TLS segment. Comment in the Linux source says wine might
+	 * do this.
+	 */
+
+	/*
+	 * GLIBC reads current %gs and call set_thread_area() with it.
+	 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
+	 * we use these segments.
+	 */
+	switch (info.entry_number) {
+	case GUGS32_SEL:
+	case GUDATA_SEL:
+	case 6:
+	case -1:
+		info.entry_number = GUGS32_SEL;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	/*
+	 * We have to copy out the GDT entry we use.
+	 *
+	 * XXX: What if a user space program does not check the return value
+	 * and tries to use 6, 7 or 8?
+	 */
+	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
+	if (error)
+		return (error);
+
+	if (LINUX_LDT_empty(&info)) {
+		a[0] = 0;
+		a[1] = 0;
+	} else {
+		a[0] = LINUX_LDT_entry_a(&info);
+		a[1] = LINUX_LDT_entry_b(&info);
+	}
+
+	memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+	if (ldebug(set_thread_area))
+		printf("Segment created in set_thread_area: "
+		    "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
+		    "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
+		    "def32: %i, gran: %i\n",
+		    sd.sd_lobase,
+		    sd.sd_hibase,
+		    sd.sd_lolimit,
+		    sd.sd_hilimit,
+		    sd.sd_type,
+		    sd.sd_dpl,
+		    sd.sd_p,
+		    sd.sd_xx,
+		    sd.sd_long,
+		    sd.sd_def32,
+		    sd.sd_gran);
+#endif
+
+	pcb = td->td_pcb;
+	pcb->pcb_gsbase = (register_t)info.base_addr;
+	set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT);
+	update_gdt_gsbase(td, info.base_addr);
+
+	return (0);
+}
+
+int
+linux_wait4(struct thread *td, struct linux_wait4_args *args)
+{
+	int error, options;
+	struct rusage ru, *rup;
+	struct l_rusage lru;
+
+#ifdef DEBUG
+	if (ldebug(wait4))
+		printf(ARGS(wait4, "%d, %p, %d, %p"),
+		    args->pid, (void *)args->status, args->options,
+		    (void *)args->rusage);
+#endif
+
+	options = (args->options & (WNOHANG | WUNTRACED));
+	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
+	if (args->options & __WCLONE)
+		options |= WLINUXCLONE;
+
+	if (args->rusage != NULL)
+		rup = &ru;
+	else
+		rup = NULL;
+	error = linux_common_wait(td, args->pid, args->status, options, rup);
+	if (error)
+		return (error);
+	if (args->rusage != NULL) {
+		bsd_to_linux_rusage(rup, &lru);
+		error = copyout(&lru, args->rusage, sizeof(lru));
+	}
+
+	return (error);
+}
diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h
new file mode 100644
index 0000000..2049445
--- /dev/null
+++ b/sys/amd64/linux32/linux32_proto.h
@@ -0,0 +1,1682 @@
+/*
+ * System call prototypes.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * $FreeBSD$
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed 
+ */
+
+#ifndef _LINUX_SYSPROTO_H_
+#define	_LINUX_SYSPROTO_H_
+
+#include <sys/signal.h>
+#include <sys/acl.h>
+#include <sys/cpuset.h>
+#include <sys/_ffcounter.h>
+#include <sys/_semaphore.h>
+#include <sys/ucontext.h>
+
+#include <bsm/audit_kevents.h>
+
+struct proc;
+
+struct thread;
+
+#define	PAD_(t)	(sizeof(register_t) <= sizeof(t) ? \
+		0 : sizeof(register_t) - sizeof(t))
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define	PADL_(t)	0
+#define	PADR_(t)	PAD_(t)
+#else
+#define	PADL_(t)	PAD_(t)
+#define	PADR_(t)	0
+#endif
+
+#define	nosys	linux_nosys
+struct linux_fork_args {
+	register_t dummy;
+};
+struct linux_open_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+};
+struct linux_waitpid_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char status_l_[PADL_(l_int *)]; l_int * status; char status_r_[PADR_(l_int *)];
+	char options_l_[PADL_(l_int)]; l_int options; char options_r_[PADR_(l_int)];
+};
+struct linux_creat_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+};
+struct linux_link_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char to_l_[PADL_(char *)]; char * to; char to_r_[PADR_(char *)];
+};
+struct linux_unlink_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+};
+struct linux_execve_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char argp_l_[PADL_(uint32_t *)]; uint32_t * argp; char argp_r_[PADR_(uint32_t *)];
+	char envp_l_[PADL_(uint32_t *)]; uint32_t * envp; char envp_r_[PADR_(uint32_t *)];
+};
+struct linux_chdir_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+};
+struct linux_time_args {
+	char tm_l_[PADL_(l_time_t *)]; l_time_t * tm; char tm_r_[PADR_(l_time_t *)];
+};
+struct linux_mknod_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+	char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
+};
+struct linux_chmod_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char mode_l_[PADL_(l_mode_t)]; l_mode_t mode; char mode_r_[PADR_(l_mode_t)];
+};
+struct linux_lchown16_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
+	char gid_l_[PADL_(l_gid16_t)]; l_gid16_t gid; char gid_r_[PADR_(l_gid16_t)];
+};
+struct linux_stat_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char up_l_[PADL_(struct linux_stat *)]; struct linux_stat * up; char up_r_[PADR_(struct linux_stat *)];
+};
+struct linux_lseek_args {
+	char fdes_l_[PADL_(l_uint)]; l_uint fdes; char fdes_r_[PADR_(l_uint)];
+	char off_l_[PADL_(l_off_t)]; l_off_t off; char off_r_[PADR_(l_off_t)];
+	char whence_l_[PADL_(l_int)]; l_int whence; char whence_r_[PADR_(l_int)];
+};
+struct linux_getpid_args {
+	register_t dummy;
+};
+struct linux_mount_args {
+	char specialfile_l_[PADL_(char *)]; char * specialfile; char specialfile_r_[PADR_(char *)];
+	char dir_l_[PADL_(char *)]; char * dir; char dir_r_[PADR_(char *)];
+	char filesystemtype_l_[PADL_(char *)]; char * filesystemtype; char filesystemtype_r_[PADR_(char *)];
+	char rwflag_l_[PADL_(l_ulong)]; l_ulong rwflag; char rwflag_r_[PADR_(l_ulong)];
+	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
+};
+struct linux_oldumount_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+};
+struct linux_setuid16_args {
+	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
+};
+struct linux_getuid16_args {
+	register_t dummy;
+};
+struct linux_stime_args {
+	register_t dummy;
+};
+struct linux_ptrace_args {
+	char req_l_[PADL_(l_long)]; l_long req; char req_r_[PADR_(l_long)];
+	char pid_l_[PADL_(l_long)]; l_long pid; char pid_r_[PADR_(l_long)];
+	char addr_l_[PADL_(l_long)]; l_long addr; char addr_r_[PADR_(l_long)];
+	char data_l_[PADL_(l_long)]; l_long data; char data_r_[PADR_(l_long)];
+};
+struct linux_alarm_args {
+	char secs_l_[PADL_(l_uint)]; l_uint secs; char secs_r_[PADR_(l_uint)];
+};
+struct linux_pause_args {
+	register_t dummy;
+};
+struct linux_utime_args {
+	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
+	char times_l_[PADL_(struct l_utimbuf *)]; struct l_utimbuf * times; char times_r_[PADR_(struct l_utimbuf *)];
+};
+struct linux_access_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char amode_l_[PADL_(l_int)]; l_int amode; char amode_r_[PADR_(l_int)];
+};
+struct linux_nice_args {
+	char inc_l_[PADL_(l_int)]; l_int inc; char inc_r_[PADR_(l_int)];
+};
+struct linux_kill_args {
+	char pid_l_[PADL_(l_int)]; l_int pid; char pid_r_[PADR_(l_int)];
+	char signum_l_[PADL_(l_int)]; l_int signum; char signum_r_[PADR_(l_int)];
+};
+struct linux_rename_args {
+	char from_l_[PADL_(char *)]; char * from; char from_r_[PADR_(char *)];
+	char to_l_[PADL_(char *)]; char * to; char to_r_[PADR_(char *)];
+};
+struct linux_mkdir_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+};
+struct linux_rmdir_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+};
+struct linux_pipe_args {
+	char pipefds_l_[PADL_(l_int *)]; l_int * pipefds; char pipefds_r_[PADR_(l_int *)];
+};
+struct linux_times_args {
+	char buf_l_[PADL_(struct l_times_argv *)]; struct l_times_argv * buf; char buf_r_[PADR_(struct l_times_argv *)];
+};
+struct linux_brk_args {
+	char dsend_l_[PADL_(l_ulong)]; l_ulong dsend; char dsend_r_[PADR_(l_ulong)];
+};
+struct linux_setgid16_args {
+	char gid_l_[PADL_(l_gid16_t)]; l_gid16_t gid; char gid_r_[PADR_(l_gid16_t)];
+};
+struct linux_getgid16_args {
+	register_t dummy;
+};
+struct linux_signal_args {
+	char sig_l_[PADL_(l_int)]; l_int sig; char sig_r_[PADR_(l_int)];
+	char handler_l_[PADL_(l_handler_t)]; l_handler_t handler; char handler_r_[PADR_(l_handler_t)];
+};
+struct linux_geteuid16_args {
+	register_t dummy;
+};
+struct linux_getegid16_args {
+	register_t dummy;
+};
+struct linux_umount_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
+};
+struct linux_ioctl_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char cmd_l_[PADL_(l_uint)]; l_uint cmd; char cmd_r_[PADR_(l_uint)];
+	char arg_l_[PADL_(uintptr_t)]; uintptr_t arg; char arg_r_[PADR_(uintptr_t)];
+};
+struct linux_fcntl_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char cmd_l_[PADL_(l_uint)]; l_uint cmd; char cmd_r_[PADR_(l_uint)];
+	char arg_l_[PADL_(uintptr_t)]; uintptr_t arg; char arg_r_[PADR_(uintptr_t)];
+};
+struct linux_olduname_args {
+	register_t dummy;
+};
+struct linux_ustat_args {
+	char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
+	char ubuf_l_[PADL_(struct l_ustat *)]; struct l_ustat * ubuf; char ubuf_r_[PADR_(struct l_ustat *)];
+};
+struct linux_getppid_args {
+	register_t dummy;
+};
+struct linux_sigaction_args {
+	char sig_l_[PADL_(l_int)]; l_int sig; char sig_r_[PADR_(l_int)];
+	char nsa_l_[PADL_(l_osigaction_t *)]; l_osigaction_t * nsa; char nsa_r_[PADR_(l_osigaction_t *)];
+	char osa_l_[PADL_(l_osigaction_t *)]; l_osigaction_t * osa; char osa_r_[PADR_(l_osigaction_t *)];
+};
+struct linux_sgetmask_args {
+	register_t dummy;
+};
+struct linux_ssetmask_args {
+	char mask_l_[PADL_(l_osigset_t)]; l_osigset_t mask; char mask_r_[PADR_(l_osigset_t)];
+};
+struct linux_setreuid16_args {
+	char ruid_l_[PADL_(l_uid16_t)]; l_uid16_t ruid; char ruid_r_[PADR_(l_uid16_t)];
+	char euid_l_[PADL_(l_uid16_t)]; l_uid16_t euid; char euid_r_[PADR_(l_uid16_t)];
+};
+struct linux_setregid16_args {
+	char rgid_l_[PADL_(l_gid16_t)]; l_gid16_t rgid; char rgid_r_[PADR_(l_gid16_t)];
+	char egid_l_[PADL_(l_gid16_t)]; l_gid16_t egid; char egid_r_[PADR_(l_gid16_t)];
+};
+struct linux_sigsuspend_args {
+	char hist0_l_[PADL_(l_int)]; l_int hist0; char hist0_r_[PADR_(l_int)];
+	char hist1_l_[PADL_(l_int)]; l_int hist1; char hist1_r_[PADR_(l_int)];
+	char mask_l_[PADL_(l_osigset_t)]; l_osigset_t mask; char mask_r_[PADR_(l_osigset_t)];
+};
+struct linux_sigpending_args {
+	char mask_l_[PADL_(l_osigset_t *)]; l_osigset_t * mask; char mask_r_[PADR_(l_osigset_t *)];
+};
+struct linux_sethostname_args {
+	char hostname_l_[PADL_(char *)]; char * hostname; char hostname_r_[PADR_(char *)];
+	char len_l_[PADL_(u_int)]; u_int len; char len_r_[PADR_(u_int)];
+};
+struct linux_setrlimit_args {
+	char resource_l_[PADL_(l_uint)]; l_uint resource; char resource_r_[PADR_(l_uint)];
+	char rlim_l_[PADL_(struct l_rlimit *)]; struct l_rlimit * rlim; char rlim_r_[PADR_(struct l_rlimit *)];
+};
+struct linux_old_getrlimit_args {
+	char resource_l_[PADL_(l_uint)]; l_uint resource; char resource_r_[PADR_(l_uint)];
+	char rlim_l_[PADL_(struct l_rlimit *)]; struct l_rlimit * rlim; char rlim_r_[PADR_(struct l_rlimit *)];
+};
+struct linux_getrusage_args {
+	char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)];
+	char rusage_l_[PADL_(struct l_rusage *)]; struct l_rusage * rusage; char rusage_r_[PADR_(struct l_rusage *)];
+};
+struct linux_gettimeofday_args {
+	char tp_l_[PADL_(struct l_timeval *)]; struct l_timeval * tp; char tp_r_[PADR_(struct l_timeval *)];
+	char tzp_l_[PADL_(struct timezone *)]; struct timezone * tzp; char tzp_r_[PADR_(struct timezone *)];
+};
+struct linux_settimeofday_args {
+	char tp_l_[PADL_(struct l_timeval *)]; struct l_timeval * tp; char tp_r_[PADR_(struct l_timeval *)];
+	char tzp_l_[PADL_(struct timezone *)]; struct timezone * tzp; char tzp_r_[PADR_(struct timezone *)];
+};
+struct linux_getgroups16_args {
+	char gidsetsize_l_[PADL_(l_uint)]; l_uint gidsetsize; char gidsetsize_r_[PADR_(l_uint)];
+	char gidset_l_[PADL_(l_gid16_t *)]; l_gid16_t * gidset; char gidset_r_[PADR_(l_gid16_t *)];
+};
+struct linux_setgroups16_args {
+	char gidsetsize_l_[PADL_(l_uint)]; l_uint gidsetsize; char gidsetsize_r_[PADR_(l_uint)];
+	char gidset_l_[PADL_(l_gid16_t *)]; l_gid16_t * gidset; char gidset_r_[PADR_(l_gid16_t *)];
+};
+struct linux_old_select_args {
+	char ptr_l_[PADL_(struct l_old_select_argv *)]; struct l_old_select_argv * ptr; char ptr_r_[PADR_(struct l_old_select_argv *)];
+};
+struct linux_symlink_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char to_l_[PADL_(char *)]; char * to; char to_r_[PADR_(char *)];
+};
+struct linux_lstat_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char up_l_[PADL_(struct linux_lstat *)]; struct linux_lstat * up; char up_r_[PADR_(struct linux_lstat *)];
+};
+struct linux_readlink_args {
+	char name_l_[PADL_(char *)]; char * name; char name_r_[PADR_(char *)];
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char count_l_[PADL_(l_int)]; l_int count; char count_r_[PADR_(l_int)];
+};
+struct linux_reboot_args {
+	char magic1_l_[PADL_(l_int)]; l_int magic1; char magic1_r_[PADR_(l_int)];
+	char magic2_l_[PADL_(l_int)]; l_int magic2; char magic2_r_[PADR_(l_int)];
+	char cmd_l_[PADL_(l_uint)]; l_uint cmd; char cmd_r_[PADR_(l_uint)];
+	char arg_l_[PADL_(void *)]; void * arg; char arg_r_[PADR_(void *)];
+};
+struct linux_readdir_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char dent_l_[PADL_(struct l_dirent *)]; struct l_dirent * dent; char dent_r_[PADR_(struct l_dirent *)];
+	char count_l_[PADL_(l_uint)]; l_uint count; char count_r_[PADR_(l_uint)];
+};
+struct linux_mmap_args {
+	char ptr_l_[PADL_(struct l_mmap_argv *)]; struct l_mmap_argv * ptr; char ptr_r_[PADR_(struct l_mmap_argv *)];
+};
+struct linux_truncate_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char length_l_[PADL_(l_ulong)]; l_ulong length; char length_r_[PADR_(l_ulong)];
+};
+struct linux_ftruncate_args {
+	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+	char length_l_[PADL_(long)]; long length; char length_r_[PADR_(long)];
+};
+struct linux_getpriority_args {
+	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
+	char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)];
+};
+struct linux_statfs_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char buf_l_[PADL_(struct l_statfs_buf *)]; struct l_statfs_buf * buf; char buf_r_[PADR_(struct l_statfs_buf *)];
+};
+struct linux_fstatfs_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char buf_l_[PADL_(struct l_statfs_buf *)]; struct l_statfs_buf * buf; char buf_r_[PADR_(struct l_statfs_buf *)];
+};
+struct linux_socketcall_args {
+	char what_l_[PADL_(l_int)]; l_int what; char what_r_[PADR_(l_int)];
+	char args_l_[PADL_(l_ulong)]; l_ulong args; char args_r_[PADR_(l_ulong)];
+};
+struct linux_syslog_args {
+	char type_l_[PADL_(l_int)]; l_int type; char type_r_[PADR_(l_int)];
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char len_l_[PADL_(l_int)]; l_int len; char len_r_[PADR_(l_int)];
+};
+struct linux_setitimer_args {
+	char which_l_[PADL_(l_int)]; l_int which; char which_r_[PADR_(l_int)];
+	char itv_l_[PADL_(struct l_itimerval *)]; struct l_itimerval * itv; char itv_r_[PADR_(struct l_itimerval *)];
+	char oitv_l_[PADL_(struct l_itimerval *)]; struct l_itimerval * oitv; char oitv_r_[PADR_(struct l_itimerval *)];
+};
+struct linux_getitimer_args {
+	char which_l_[PADL_(l_int)]; l_int which; char which_r_[PADR_(l_int)];
+	char itv_l_[PADL_(struct l_itimerval *)]; struct l_itimerval * itv; char itv_r_[PADR_(struct l_itimerval *)];
+};
+struct linux_newstat_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char buf_l_[PADL_(struct l_newstat *)]; struct l_newstat * buf; char buf_r_[PADR_(struct l_newstat *)];
+};
+struct linux_newlstat_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char buf_l_[PADL_(struct l_newstat *)]; struct l_newstat * buf; char buf_r_[PADR_(struct l_newstat *)];
+};
+struct linux_newfstat_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char buf_l_[PADL_(struct l_newstat *)]; struct l_newstat * buf; char buf_r_[PADR_(struct l_newstat *)];
+};
+struct linux_uname_args {
+	register_t dummy;
+};
+struct linux_iopl_args {
+	char level_l_[PADL_(l_int)]; l_int level; char level_r_[PADR_(l_int)];
+};
+struct linux_vhangup_args {
+	register_t dummy;
+};
+struct linux_wait4_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char status_l_[PADL_(l_uint *)]; l_uint * status; char status_r_[PADR_(l_uint *)];
+	char options_l_[PADL_(l_int)]; l_int options; char options_r_[PADR_(l_int)];
+	char rusage_l_[PADL_(struct l_rusage *)]; struct l_rusage * rusage; char rusage_r_[PADR_(struct l_rusage *)];
+};
+struct linux_swapoff_args {
+	register_t dummy;
+};
+struct linux_sysinfo_args {
+	char info_l_[PADL_(struct l_sysinfo *)]; struct l_sysinfo * info; char info_r_[PADR_(struct l_sysinfo *)];
+};
+struct linux_ipc_args {
+	char what_l_[PADL_(l_uint)]; l_uint what; char what_r_[PADR_(l_uint)];
+	char arg1_l_[PADL_(l_int)]; l_int arg1; char arg1_r_[PADR_(l_int)];
+	char arg2_l_[PADL_(l_int)]; l_int arg2; char arg2_r_[PADR_(l_int)];
+	char arg3_l_[PADL_(l_int)]; l_int arg3; char arg3_r_[PADR_(l_int)];
+	char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)];
+	char arg5_l_[PADL_(l_long)]; l_long arg5; char arg5_r_[PADR_(l_long)];
+};
+struct linux_sigreturn_args {
+	char sfp_l_[PADL_(struct l_sigframe *)]; struct l_sigframe * sfp; char sfp_r_[PADR_(struct l_sigframe *)];
+};
+struct linux_clone_args {
+	char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
+	char stack_l_[PADL_(void *)]; void * stack; char stack_r_[PADR_(void *)];
+	char parent_tidptr_l_[PADL_(void *)]; void * parent_tidptr; char parent_tidptr_r_[PADR_(void *)];
+	char tls_l_[PADL_(void *)]; void * tls; char tls_r_[PADR_(void *)];
+	char child_tidptr_l_[PADL_(void *)]; void * child_tidptr; char child_tidptr_r_[PADR_(void *)];
+};
+struct linux_setdomainname_args {
+	char name_l_[PADL_(char *)]; char * name; char name_r_[PADR_(char *)];
+	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
+};
+struct linux_newuname_args {
+	char buf_l_[PADL_(struct l_new_utsname *)]; struct l_new_utsname * buf; char buf_r_[PADR_(struct l_new_utsname *)];
+};
+struct linux_adjtimex_args {
+	register_t dummy;
+};
+struct linux_mprotect_args {
+	char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
+	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
+	char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
+};
+struct linux_sigprocmask_args {
+	char how_l_[PADL_(l_int)]; l_int how; char how_r_[PADR_(l_int)];
+	char mask_l_[PADL_(l_osigset_t *)]; l_osigset_t * mask; char mask_r_[PADR_(l_osigset_t *)];
+	char omask_l_[PADL_(l_osigset_t *)]; l_osigset_t * omask; char omask_r_[PADR_(l_osigset_t *)];
+};
+struct linux_create_module_args {
+	register_t dummy;
+};
+struct linux_init_module_args {
+	register_t dummy;
+};
+struct linux_delete_module_args {
+	register_t dummy;
+};
+struct linux_get_kernel_syms_args {
+	register_t dummy;
+};
+struct linux_quotactl_args {
+	register_t dummy;
+};
+struct linux_bdflush_args {
+	register_t dummy;
+};
+struct linux_sysfs_args {
+	char option_l_[PADL_(l_int)]; l_int option; char option_r_[PADR_(l_int)];
+	char arg1_l_[PADL_(l_ulong)]; l_ulong arg1; char arg1_r_[PADR_(l_ulong)];
+	char arg2_l_[PADL_(l_ulong)]; l_ulong arg2; char arg2_r_[PADR_(l_ulong)];
+};
+struct linux_personality_args {
+	char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+};
+struct linux_setfsuid16_args {
+	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
+};
+struct linux_setfsgid16_args {
+	char gid_l_[PADL_(l_gid16_t)]; l_gid16_t gid; char gid_r_[PADR_(l_gid16_t)];
+};
+struct linux_llseek_args {
+	char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+	char ohigh_l_[PADL_(l_ulong)]; l_ulong ohigh; char ohigh_r_[PADR_(l_ulong)];
+	char olow_l_[PADL_(l_ulong)]; l_ulong olow; char olow_r_[PADR_(l_ulong)];
+	char res_l_[PADL_(l_loff_t *)]; l_loff_t * res; char res_r_[PADR_(l_loff_t *)];
+	char whence_l_[PADL_(l_uint)]; l_uint whence; char whence_r_[PADR_(l_uint)];
+};
+struct linux_getdents_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char dent_l_[PADL_(void *)]; void * dent; char dent_r_[PADR_(void *)];
+	char count_l_[PADL_(l_uint)]; l_uint count; char count_r_[PADR_(l_uint)];
+};
+struct linux_select_args {
+	char nfds_l_[PADL_(l_int)]; l_int nfds; char nfds_r_[PADR_(l_int)];
+	char readfds_l_[PADL_(l_fd_set *)]; l_fd_set * readfds; char readfds_r_[PADR_(l_fd_set *)];
+	char writefds_l_[PADL_(l_fd_set *)]; l_fd_set * writefds; char writefds_r_[PADR_(l_fd_set *)];
+	char exceptfds_l_[PADL_(l_fd_set *)]; l_fd_set * exceptfds; char exceptfds_r_[PADR_(l_fd_set *)];
+	char timeout_l_[PADL_(struct l_timeval *)]; struct l_timeval * timeout; char timeout_r_[PADR_(struct l_timeval *)];
+};
+struct linux_msync_args {
+	char addr_l_[PADL_(l_ulong)]; l_ulong addr; char addr_r_[PADR_(l_ulong)];
+	char len_l_[PADL_(l_size_t)]; l_size_t len; char len_r_[PADR_(l_size_t)];
+	char fl_l_[PADL_(l_int)]; l_int fl; char fl_r_[PADR_(l_int)];
+};
+struct linux_readv_args {
+	char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
+	char iovp_l_[PADL_(struct l_iovec32 *)]; struct l_iovec32 * iovp; char iovp_r_[PADR_(struct l_iovec32 *)];
+	char iovcnt_l_[PADL_(l_ulong)]; l_ulong iovcnt; char iovcnt_r_[PADR_(l_ulong)];
+};
+struct linux_writev_args {
+	char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
+	char iovp_l_[PADL_(struct l_iovec32 *)]; struct l_iovec32 * iovp; char iovp_r_[PADR_(struct l_iovec32 *)];
+	char iovcnt_l_[PADL_(l_ulong)]; l_ulong iovcnt; char iovcnt_r_[PADR_(l_ulong)];
+};
+struct linux_getsid_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+};
+struct linux_fdatasync_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+};
+struct linux_sysctl_args {
+	char args_l_[PADL_(struct l___sysctl_args *)]; struct l___sysctl_args * args; char args_r_[PADR_(struct l___sysctl_args *)];
+};
+struct linux_sched_setscheduler_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char policy_l_[PADL_(l_int)]; l_int policy; char policy_r_[PADR_(l_int)];
+	char param_l_[PADL_(struct l_sched_param *)]; struct l_sched_param * param; char param_r_[PADR_(struct l_sched_param *)];
+};
+struct linux_sched_getscheduler_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+};
+struct linux_sched_get_priority_max_args {
+	char policy_l_[PADL_(l_int)]; l_int policy; char policy_r_[PADR_(l_int)];
+};
+struct linux_sched_get_priority_min_args {
+	char policy_l_[PADL_(l_int)]; l_int policy; char policy_r_[PADR_(l_int)];
+};
+struct linux_sched_rr_get_interval_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char interval_l_[PADL_(struct l_timespec *)]; struct l_timespec * interval; char interval_r_[PADR_(struct l_timespec *)];
+};
+struct linux_nanosleep_args {
+	char rqtp_l_[PADL_(const struct l_timespec *)]; const struct l_timespec * rqtp; char rqtp_r_[PADR_(const struct l_timespec *)];
+	char rmtp_l_[PADL_(struct l_timespec *)]; struct l_timespec * rmtp; char rmtp_r_[PADR_(struct l_timespec *)];
+};
+struct linux_mremap_args {
+	char addr_l_[PADL_(l_ulong)]; l_ulong addr; char addr_r_[PADR_(l_ulong)];
+	char old_len_l_[PADL_(l_ulong)]; l_ulong old_len; char old_len_r_[PADR_(l_ulong)];
+	char new_len_l_[PADL_(l_ulong)]; l_ulong new_len; char new_len_r_[PADR_(l_ulong)];
+	char flags_l_[PADL_(l_ulong)]; l_ulong flags; char flags_r_[PADR_(l_ulong)];
+	char new_addr_l_[PADL_(l_ulong)]; l_ulong new_addr; char new_addr_r_[PADR_(l_ulong)];
+};
+struct linux_setresuid16_args {
+	char ruid_l_[PADL_(l_uid16_t)]; l_uid16_t ruid; char ruid_r_[PADR_(l_uid16_t)];
+	char euid_l_[PADL_(l_uid16_t)]; l_uid16_t euid; char euid_r_[PADR_(l_uid16_t)];
+	char suid_l_[PADL_(l_uid16_t)]; l_uid16_t suid; char suid_r_[PADR_(l_uid16_t)];
+};
+struct linux_getresuid16_args {
+	char ruid_l_[PADL_(l_uid16_t *)]; l_uid16_t * ruid; char ruid_r_[PADR_(l_uid16_t *)];
+	char euid_l_[PADL_(l_uid16_t *)]; l_uid16_t * euid; char euid_r_[PADR_(l_uid16_t *)];
+	char suid_l_[PADL_(l_uid16_t *)]; l_uid16_t * suid; char suid_r_[PADR_(l_uid16_t *)];
+};
+struct linux_query_module_args {
+	register_t dummy;
+};
+struct linux_nfsservctl_args {
+	register_t dummy;
+};
+struct linux_setresgid16_args {
+	char rgid_l_[PADL_(l_gid16_t)]; l_gid16_t rgid; char rgid_r_[PADR_(l_gid16_t)];
+	char egid_l_[PADL_(l_gid16_t)]; l_gid16_t egid; char egid_r_[PADR_(l_gid16_t)];
+	char sgid_l_[PADL_(l_gid16_t)]; l_gid16_t sgid; char sgid_r_[PADR_(l_gid16_t)];
+};
+struct linux_getresgid16_args {
+	char rgid_l_[PADL_(l_gid16_t *)]; l_gid16_t * rgid; char rgid_r_[PADR_(l_gid16_t *)];
+	char egid_l_[PADL_(l_gid16_t *)]; l_gid16_t * egid; char egid_r_[PADR_(l_gid16_t *)];
+	char sgid_l_[PADL_(l_gid16_t *)]; l_gid16_t * sgid; char sgid_r_[PADR_(l_gid16_t *)];
+};
+struct linux_prctl_args {
+	char option_l_[PADL_(l_int)]; l_int option; char option_r_[PADR_(l_int)];
+	char arg2_l_[PADL_(l_int)]; l_int arg2; char arg2_r_[PADR_(l_int)];
+	char arg3_l_[PADL_(l_int)]; l_int arg3; char arg3_r_[PADR_(l_int)];
+	char arg4_l_[PADL_(l_int)]; l_int arg4; char arg4_r_[PADR_(l_int)];
+	char arg5_l_[PADL_(l_int)]; l_int arg5; char arg5_r_[PADR_(l_int)];
+};
+struct linux_rt_sigreturn_args {
+	char ucp_l_[PADL_(struct l_ucontext *)]; struct l_ucontext * ucp; char ucp_r_[PADR_(struct l_ucontext *)];
+};
+struct linux_rt_sigaction_args {
+	char sig_l_[PADL_(l_int)]; l_int sig; char sig_r_[PADR_(l_int)];
+	char act_l_[PADL_(l_sigaction_t *)]; l_sigaction_t * act; char act_r_[PADR_(l_sigaction_t *)];
+	char oact_l_[PADL_(l_sigaction_t *)]; l_sigaction_t * oact; char oact_r_[PADR_(l_sigaction_t *)];
+	char sigsetsize_l_[PADL_(l_size_t)]; l_size_t sigsetsize; char sigsetsize_r_[PADR_(l_size_t)];
+};
+struct linux_rt_sigprocmask_args {
+	char how_l_[PADL_(l_int)]; l_int how; char how_r_[PADR_(l_int)];
+	char mask_l_[PADL_(l_sigset_t *)]; l_sigset_t * mask; char mask_r_[PADR_(l_sigset_t *)];
+	char omask_l_[PADL_(l_sigset_t *)]; l_sigset_t * omask; char omask_r_[PADR_(l_sigset_t *)];
+	char sigsetsize_l_[PADL_(l_size_t)]; l_size_t sigsetsize; char sigsetsize_r_[PADR_(l_size_t)];
+};
+struct linux_rt_sigpending_args {
+	char set_l_[PADL_(l_sigset_t *)]; l_sigset_t * set; char set_r_[PADR_(l_sigset_t *)];
+	char sigsetsize_l_[PADL_(l_size_t)]; l_size_t sigsetsize; char sigsetsize_r_[PADR_(l_size_t)];
+};
+struct linux_rt_sigtimedwait_args {
+	char mask_l_[PADL_(l_sigset_t *)]; l_sigset_t * mask; char mask_r_[PADR_(l_sigset_t *)];
+	char ptr_l_[PADL_(l_siginfo_t *)]; l_siginfo_t * ptr; char ptr_r_[PADR_(l_siginfo_t *)];
+	char timeout_l_[PADL_(struct l_timeval *)]; struct l_timeval * timeout; char timeout_r_[PADR_(struct l_timeval *)];
+	char sigsetsize_l_[PADL_(l_size_t)]; l_size_t sigsetsize; char sigsetsize_r_[PADR_(l_size_t)];
+};
+struct linux_rt_sigqueueinfo_args {
+	register_t dummy;
+};
+struct linux_rt_sigsuspend_args {
+	char newset_l_[PADL_(l_sigset_t *)]; l_sigset_t * newset; char newset_r_[PADR_(l_sigset_t *)];
+	char sigsetsize_l_[PADL_(l_size_t)]; l_size_t sigsetsize; char sigsetsize_r_[PADR_(l_size_t)];
+};
+struct linux_pread_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char nbyte_l_[PADL_(l_size_t)]; l_size_t nbyte; char nbyte_r_[PADR_(l_size_t)];
+	char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)];
+};
+struct linux_pwrite_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char nbyte_l_[PADL_(l_size_t)]; l_size_t nbyte; char nbyte_r_[PADR_(l_size_t)];
+	char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)];
+};
+struct linux_chown16_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
+	char gid_l_[PADL_(l_gid16_t)]; l_gid16_t gid; char gid_r_[PADR_(l_gid16_t)];
+};
+struct linux_getcwd_args {
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char bufsize_l_[PADL_(l_ulong)]; l_ulong bufsize; char bufsize_r_[PADR_(l_ulong)];
+};
+struct linux_capget_args {
+	char hdrp_l_[PADL_(struct l_user_cap_header *)]; struct l_user_cap_header * hdrp; char hdrp_r_[PADR_(struct l_user_cap_header *)];
+	char datap_l_[PADL_(struct l_user_cap_data *)]; struct l_user_cap_data * datap; char datap_r_[PADR_(struct l_user_cap_data *)];
+};
+struct linux_capset_args {
+	char hdrp_l_[PADL_(struct l_user_cap_header *)]; struct l_user_cap_header * hdrp; char hdrp_r_[PADR_(struct l_user_cap_header *)];
+	char datap_l_[PADL_(struct l_user_cap_data *)]; struct l_user_cap_data * datap; char datap_r_[PADR_(struct l_user_cap_data *)];
+};
+struct linux_sigaltstack_args {
+	char uss_l_[PADL_(l_stack_t *)]; l_stack_t * uss; char uss_r_[PADR_(l_stack_t *)];
+	char uoss_l_[PADL_(l_stack_t *)]; l_stack_t * uoss; char uoss_r_[PADR_(l_stack_t *)];
+};
+struct linux_sendfile_args {
+	register_t dummy;
+};
+struct linux_vfork_args {
+	register_t dummy;
+};
+struct linux_getrlimit_args {
+	char resource_l_[PADL_(l_uint)]; l_uint resource; char resource_r_[PADR_(l_uint)];
+	char rlim_l_[PADL_(struct l_rlimit *)]; struct l_rlimit * rlim; char rlim_r_[PADR_(struct l_rlimit *)];
+};
+struct linux_mmap2_args {
+	char addr_l_[PADL_(l_ulong)]; l_ulong addr; char addr_r_[PADR_(l_ulong)];
+	char len_l_[PADL_(l_ulong)]; l_ulong len; char len_r_[PADR_(l_ulong)];
+	char prot_l_[PADL_(l_ulong)]; l_ulong prot; char prot_r_[PADR_(l_ulong)];
+	char flags_l_[PADL_(l_ulong)]; l_ulong flags; char flags_r_[PADR_(l_ulong)];
+	char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
+	char pgoff_l_[PADL_(l_ulong)]; l_ulong pgoff; char pgoff_r_[PADR_(l_ulong)];
+};
+struct linux_truncate64_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char length_l_[PADL_(l_loff_t)]; l_loff_t length; char length_r_[PADR_(l_loff_t)];
+};
+struct linux_ftruncate64_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char length_l_[PADL_(l_loff_t)]; l_loff_t length; char length_r_[PADR_(l_loff_t)];
+};
+struct linux_stat64_args {
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char statbuf_l_[PADL_(struct l_stat64 *)]; struct l_stat64 * statbuf; char statbuf_r_[PADR_(struct l_stat64 *)];
+};
+struct linux_lstat64_args {
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char statbuf_l_[PADL_(struct l_stat64 *)]; struct l_stat64 * statbuf; char statbuf_r_[PADR_(struct l_stat64 *)];
+};
+struct linux_fstat64_args {
+	char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+	char statbuf_l_[PADL_(struct l_stat64 *)]; struct l_stat64 * statbuf; char statbuf_r_[PADR_(struct l_stat64 *)];
+};
+struct linux_lchown_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char uid_l_[PADL_(l_uid_t)]; l_uid_t uid; char uid_r_[PADR_(l_uid_t)];
+	char gid_l_[PADL_(l_gid_t)]; l_gid_t gid; char gid_r_[PADR_(l_gid_t)];
+};
+struct linux_getuid_args {
+	register_t dummy;
+};
+struct linux_getgid_args {
+	register_t dummy;
+};
+struct linux_getgroups_args {
+	char gidsetsize_l_[PADL_(l_int)]; l_int gidsetsize; char gidsetsize_r_[PADR_(l_int)];
+	char grouplist_l_[PADL_(l_gid_t *)]; l_gid_t * grouplist; char grouplist_r_[PADR_(l_gid_t *)];
+};
+struct linux_setgroups_args {
+	char gidsetsize_l_[PADL_(l_int)]; l_int gidsetsize; char gidsetsize_r_[PADR_(l_int)];
+	char grouplist_l_[PADL_(l_gid_t *)]; l_gid_t * grouplist; char grouplist_r_[PADR_(l_gid_t *)];
+};
+struct linux_chown_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char uid_l_[PADL_(l_uid_t)]; l_uid_t uid; char uid_r_[PADR_(l_uid_t)];
+	char gid_l_[PADL_(l_gid_t)]; l_gid_t gid; char gid_r_[PADR_(l_gid_t)];
+};
+struct linux_setfsuid_args {
+	char uid_l_[PADL_(l_uid_t)]; l_uid_t uid; char uid_r_[PADR_(l_uid_t)];
+};
+struct linux_setfsgid_args {
+	char gid_l_[PADL_(l_gid_t)]; l_gid_t gid; char gid_r_[PADR_(l_gid_t)];
+};
+struct linux_pivot_root_args {
+	char new_root_l_[PADL_(char *)]; char * new_root; char new_root_r_[PADR_(char *)];
+	char put_old_l_[PADL_(char *)]; char * put_old; char put_old_r_[PADR_(char *)];
+};
+struct linux_mincore_args {
+	char start_l_[PADL_(l_ulong)]; l_ulong start; char start_r_[PADR_(l_ulong)];
+	char len_l_[PADL_(l_size_t)]; l_size_t len; char len_r_[PADR_(l_size_t)];
+	char vec_l_[PADL_(u_char *)]; u_char * vec; char vec_r_[PADR_(u_char *)];
+};
+struct linux_getdents64_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char dirent_l_[PADL_(void *)]; void * dirent; char dirent_r_[PADR_(void *)];
+	char count_l_[PADL_(l_uint)]; l_uint count; char count_r_[PADR_(l_uint)];
+};
+struct linux_fcntl64_args {
+	char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
+	char cmd_l_[PADL_(l_uint)]; l_uint cmd; char cmd_r_[PADR_(l_uint)];
+	char arg_l_[PADL_(uintptr_t)]; uintptr_t arg; char arg_r_[PADR_(uintptr_t)];
+};
+struct linux_gettid_args {
+	register_t dummy;
+};
+struct linux_setxattr_args {
+	register_t dummy;
+};
+struct linux_lsetxattr_args {
+	register_t dummy;
+};
+struct linux_fsetxattr_args {
+	register_t dummy;
+};
+struct linux_getxattr_args {
+	register_t dummy;
+};
+struct linux_lgetxattr_args {
+	register_t dummy;
+};
+struct linux_fgetxattr_args {
+	register_t dummy;
+};
+struct linux_listxattr_args {
+	register_t dummy;
+};
+struct linux_llistxattr_args {
+	register_t dummy;
+};
+struct linux_flistxattr_args {
+	register_t dummy;
+};
+struct linux_removexattr_args {
+	register_t dummy;
+};
+struct linux_lremovexattr_args {
+	register_t dummy;
+};
+struct linux_fremovexattr_args {
+	register_t dummy;
+};
+struct linux_tkill_args {
+	char tid_l_[PADL_(int)]; int tid; char tid_r_[PADR_(int)];
+	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
+};
+struct linux_sys_futex_args {
+	char uaddr_l_[PADL_(void *)]; void * uaddr; char uaddr_r_[PADR_(void *)];
+	char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)];
+	char val_l_[PADL_(uint32_t)]; uint32_t val; char val_r_[PADR_(uint32_t)];
+	char timeout_l_[PADL_(struct l_timespec *)]; struct l_timespec * timeout; char timeout_r_[PADR_(struct l_timespec *)];
+	char uaddr2_l_[PADL_(uint32_t *)]; uint32_t * uaddr2; char uaddr2_r_[PADR_(uint32_t *)];
+	char val3_l_[PADL_(uint32_t)]; uint32_t val3; char val3_r_[PADR_(uint32_t)];
+};
+struct linux_sched_setaffinity_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char len_l_[PADL_(l_uint)]; l_uint len; char len_r_[PADR_(l_uint)];
+	char user_mask_ptr_l_[PADL_(l_ulong *)]; l_ulong * user_mask_ptr; char user_mask_ptr_r_[PADR_(l_ulong *)];
+};
+struct linux_sched_getaffinity_args {
+	char pid_l_[PADL_(l_pid_t)]; l_pid_t pid; char pid_r_[PADR_(l_pid_t)];
+	char len_l_[PADL_(l_uint)]; l_uint len; char len_r_[PADR_(l_uint)];
+	char user_mask_ptr_l_[PADL_(l_ulong *)]; l_ulong * user_mask_ptr; char user_mask_ptr_r_[PADR_(l_ulong *)];
+};
+struct linux_set_thread_area_args {
+	char desc_l_[PADL_(struct l_user_desc *)]; struct l_user_desc * desc; char desc_r_[PADR_(struct l_user_desc *)];
+};
+struct linux_fadvise64_args {
+	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+	char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)];
+	char len_l_[PADL_(l_size_t)]; l_size_t len; char len_r_[PADR_(l_size_t)];
+	char advice_l_[PADL_(int)]; int advice; char advice_r_[PADR_(int)];
+};
+struct linux_exit_group_args {
+	char error_code_l_[PADL_(int)]; int error_code; char error_code_r_[PADR_(int)];
+};
+struct linux_lookup_dcookie_args {
+	register_t dummy;
+};
+struct linux_epoll_create_args {
+	register_t dummy;
+};
+struct linux_epoll_ctl_args {
+	register_t dummy;
+};
+struct linux_epoll_wait_args {
+	register_t dummy;
+};
+struct linux_remap_file_pages_args {
+	register_t dummy;
+};
+struct linux_set_tid_address_args {
+	char tidptr_l_[PADL_(int *)]; int * tidptr; char tidptr_r_[PADR_(int *)];
+};
+struct linux_timer_create_args {
+	register_t dummy;
+};
+struct linux_timer_settime_args {
+	register_t dummy;
+};
+struct linux_timer_gettime_args {
+	register_t dummy;
+};
+struct linux_timer_getoverrun_args {
+	register_t dummy;
+};
+struct linux_timer_delete_args {
+	register_t dummy;
+};
+struct linux_clock_settime_args {
+	char which_l_[PADL_(clockid_t)]; clockid_t which; char which_r_[PADR_(clockid_t)];
+	char tp_l_[PADL_(struct l_timespec *)]; struct l_timespec * tp; char tp_r_[PADR_(struct l_timespec *)];
+};
+struct linux_clock_gettime_args {
+	char which_l_[PADL_(clockid_t)]; clockid_t which; char which_r_[PADR_(clockid_t)];
+	char tp_l_[PADL_(struct l_timespec *)]; struct l_timespec * tp; char tp_r_[PADR_(struct l_timespec *)];
+};
+struct linux_clock_getres_args {
+	char which_l_[PADL_(clockid_t)]; clockid_t which; char which_r_[PADR_(clockid_t)];
+	char tp_l_[PADL_(struct l_timespec *)]; struct l_timespec * tp; char tp_r_[PADR_(struct l_timespec *)];
+};
+struct linux_clock_nanosleep_args {
+	char which_l_[PADL_(clockid_t)]; clockid_t which; char which_r_[PADR_(clockid_t)];
+	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+	char rqtp_l_[PADL_(struct l_timespec *)]; struct l_timespec * rqtp; char rqtp_r_[PADR_(struct l_timespec *)];
+	char rmtp_l_[PADL_(struct l_timespec *)]; struct l_timespec * rmtp; char rmtp_r_[PADR_(struct l_timespec *)];
+};
+struct linux_statfs64_args {
+	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+	char bufsize_l_[PADL_(size_t)]; size_t bufsize; char bufsize_r_[PADR_(size_t)];
+	char buf_l_[PADL_(struct l_statfs64_buf *)]; struct l_statfs64_buf * buf; char buf_r_[PADR_(struct l_statfs64_buf *)];
+};
+struct linux_fstatfs64_args {
+	register_t dummy;
+};
+struct linux_tgkill_args {
+	char tgid_l_[PADL_(int)]; int tgid; char tgid_r_[PADR_(int)];
+	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
+	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
+};
+struct linux_utimes_args {
+	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
+	char tptr_l_[PADL_(struct l_timeval *)]; struct l_timeval * tptr; char tptr_r_[PADR_(struct l_timeval *)];
+};
+struct linux_fadvise64_64_args {
+	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+	char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)];
+	char len_l_[PADL_(l_loff_t)]; l_loff_t len; char len_r_[PADR_(l_loff_t)];
+	char advice_l_[PADL_(int)]; int advice; char advice_r_[PADR_(int)];
+};
+struct linux_mbind_args {
+	register_t dummy;
+};
+struct linux_get_mempolicy_args {
+	register_t dummy;
+};
+struct linux_set_mempolicy_args {
+	register_t dummy;
+};
+struct linux_mq_open_args {
+	register_t dummy;
+};
+struct linux_mq_unlink_args {
+	register_t dummy;
+};
+struct linux_mq_timedsend_args {
+	register_t dummy;
+};
+struct linux_mq_timedreceive_args {
+	register_t dummy;
+};
+struct linux_mq_notify_args {
+	register_t dummy;
+};
+struct linux_mq_getsetattr_args {
+	register_t dummy;
+};
+struct linux_kexec_load_args {
+	register_t dummy;
+};
+struct linux_waitid_args {
+	register_t dummy;
+};
+struct linux_add_key_args {
+	register_t dummy;
+};
+struct linux_request_key_args {
+	register_t dummy;
+};
+struct linux_keyctl_args {
+	register_t dummy;
+};
+struct linux_ioprio_set_args {
+	register_t dummy;
+};
+struct linux_ioprio_get_args {
+	register_t dummy;
+};
+struct linux_inotify_init_args {
+	register_t dummy;
+};
+struct linux_inotify_add_watch_args {
+	register_t dummy;
+};
+struct linux_inotify_rm_watch_args {
+	register_t dummy;
+};
+struct linux_migrate_pages_args {
+	register_t dummy;
+};
+struct linux_openat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+};
+struct linux_mkdirat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+};
+struct linux_mknodat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char mode_l_[PADL_(l_int)]; l_int mode; char mode_r_[PADR_(l_int)];
+	char dev_l_[PADL_(l_uint)]; l_uint dev; char dev_r_[PADR_(l_uint)];
+};
+struct linux_fchownat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
+	char gid_l_[PADL_(l_gid16_t)]; l_gid16_t gid; char gid_r_[PADR_(l_gid16_t)];
+	char flag_l_[PADL_(l_int)]; l_int flag; char flag_r_[PADR_(l_int)];
+};
+struct linux_futimesat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(char *)]; char * filename; char filename_r_[PADR_(char *)];
+	char utimes_l_[PADL_(struct l_timeval *)]; struct l_timeval * utimes; char utimes_r_[PADR_(struct l_timeval *)];
+};
+struct linux_fstatat64_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char pathname_l_[PADL_(char *)]; char * pathname; char pathname_r_[PADR_(char *)];
+	char statbuf_l_[PADL_(struct l_stat64 *)]; struct l_stat64 * statbuf; char statbuf_r_[PADR_(struct l_stat64 *)];
+	char flag_l_[PADL_(l_int)]; l_int flag; char flag_r_[PADR_(l_int)];
+};
+struct linux_unlinkat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+	char flag_l_[PADL_(l_int)]; l_int flag; char flag_r_[PADR_(l_int)];
+};
+struct linux_renameat_args {
+	char olddfd_l_[PADL_(l_int)]; l_int olddfd; char olddfd_r_[PADR_(l_int)];
+	char oldname_l_[PADL_(const char *)]; const char * oldname; char oldname_r_[PADR_(const char *)];
+	char newdfd_l_[PADL_(l_int)]; l_int newdfd; char newdfd_r_[PADR_(l_int)];
+	char newname_l_[PADL_(const char *)]; const char * newname; char newname_r_[PADR_(const char *)];
+};
+struct linux_linkat_args {
+	char olddfd_l_[PADL_(l_int)]; l_int olddfd; char olddfd_r_[PADR_(l_int)];
+	char oldname_l_[PADL_(const char *)]; const char * oldname; char oldname_r_[PADR_(const char *)];
+	char newdfd_l_[PADL_(l_int)]; l_int newdfd; char newdfd_r_[PADR_(l_int)];
+	char newname_l_[PADL_(const char *)]; const char * newname; char newname_r_[PADR_(const char *)];
+	char flag_l_[PADL_(l_int)]; l_int flag; char flag_r_[PADR_(l_int)];
+};
+struct linux_symlinkat_args {
+	char oldname_l_[PADL_(const char *)]; const char * oldname; char oldname_r_[PADR_(const char *)];
+	char newdfd_l_[PADL_(l_int)]; l_int newdfd; char newdfd_r_[PADR_(l_int)];
+	char newname_l_[PADL_(const char *)]; const char * newname; char newname_r_[PADR_(const char *)];
+};
+struct linux_readlinkat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
+	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
+	char bufsiz_l_[PADL_(l_int)]; l_int bufsiz; char bufsiz_r_[PADR_(l_int)];
+};
+struct linux_fchmodat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char mode_l_[PADL_(l_mode_t)]; l_mode_t mode; char mode_r_[PADR_(l_mode_t)];
+};
+struct linux_faccessat_args {
+	char dfd_l_[PADL_(l_int)]; l_int dfd; char dfd_r_[PADR_(l_int)];
+	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
+	char amode_l_[PADL_(l_int)]; l_int amode; char amode_r_[PADR_(l_int)];
+	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
+};
+struct linux_pselect6_args {
+	register_t dummy;
+};
+struct linux_ppoll_args {
+	register_t dummy;
+};
+struct linux_unshare_args {
+	register_t dummy;
+};
+struct linux_set_robust_list_args {
+	char head_l_[PADL_(struct linux_robust_list_head *)]; struct linux_robust_list_head * head; char head_r_[PADR_(struct linux_robust_list_head *)];
+	char len_l_[PADL_(l_size_t)]; l_size_t len; char len_r_[PADR_(l_size_t)];
+};
+struct linux_get_robust_list_args {
+	char pid_l_[PADL_(l_int)]; l_int pid; char pid_r_[PADR_(l_int)];
+	char head_l_[PADL_(struct linux_robust_list_head *)]; struct linux_robust_list_head * head; char head_r_[PADR_(struct linux_robust_list_head *)];
+	char len_l_[PADL_(l_size_t *)]; l_size_t * len; char len_r_[PADR_(l_size_t *)];
+};
+struct linux_splice_args {
+	register_t dummy;
+};
+struct linux_sync_file_range_args {
+	register_t dummy;
+};
+struct linux_tee_args {
+	register_t dummy;
+};
+struct linux_vmsplice_args {
+	register_t dummy;
+};
+struct linux_move_pages_args {
+	register_t dummy;
+};
+struct linux_getcpu_args {
+	register_t dummy;
+};
+struct linux_epoll_pwait_args {
+	register_t dummy;
+};
+struct linux_utimensat_args {
+	register_t dummy;
+};
+struct linux_signalfd_args {
+	register_t dummy;
+};
+struct linux_timerfd_create_args {
+	register_t dummy;
+};
+struct linux_eventfd_args {
+	register_t dummy;
+};
+struct linux_fallocate_args {
+	register_t dummy;
+};
+struct linux_timerfd_settime_args {
+	register_t dummy;
+};
+struct linux_timerfd_gettime_args {
+	register_t dummy;
+};
+struct linux_signalfd4_args {
+	register_t dummy;
+};
+struct linux_eventfd2_args {
+	register_t dummy;
+};
+struct linux_epoll_create1_args {
+	register_t dummy;
+};
+struct linux_dup3_args {
+	register_t dummy;
+};
+struct linux_pipe2_args {
+	char pipefds_l_[PADL_(l_int *)]; l_int * pipefds; char pipefds_r_[PADR_(l_int *)];
+	char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
+};
+struct linux_inotify_init1_args {
+	register_t dummy;
+};
+struct linux_preadv_args {
+	register_t dummy;
+};
+struct linux_pwritev_args {
+	register_t dummy;
+};
+struct linux_rt_tsigqueueinfo_args {
+	register_t dummy;
+};
+struct linux_perf_event_open_args {
+	register_t dummy;
+};
+struct linux_recvmmsg_args {
+	register_t dummy;
+};
+struct linux_fanotify_init_args {
+	register_t dummy;
+};
+struct linux_fanotify_mark_args {
+	register_t dummy;
+};
+struct linux_prlimit64_args {
+	register_t dummy;
+};
+struct linux_name_to_handle_at_args {
+	register_t dummy;
+};
+struct linux_open_by_handle_at_args {
+	register_t dummy;
+};
+struct linux_clock_adjtime_args {
+	register_t dummy;
+};
+struct linux_syncfs_args {
+	register_t dummy;
+};
+struct linux_sendmmsg_args {
+	register_t dummy;
+};
+struct linux_setns_args {
+	register_t dummy;
+};
+struct linux_process_vm_readv_args {
+	register_t dummy;
+};
+struct linux_process_vm_writev_args {
+	register_t dummy;
+};
+#define	nosys	linux_nosys
+int	linux_fork(struct thread *, struct linux_fork_args *);
+int	linux_open(struct thread *, struct linux_open_args *);
+int	linux_waitpid(struct thread *, struct linux_waitpid_args *);
+int	linux_creat(struct thread *, struct linux_creat_args *);
+int	linux_link(struct thread *, struct linux_link_args *);
+int	linux_unlink(struct thread *, struct linux_unlink_args *);
+int	linux_execve(struct thread *, struct linux_execve_args *);
+int	linux_chdir(struct thread *, struct linux_chdir_args *);
+int	linux_time(struct thread *, struct linux_time_args *);
+int	linux_mknod(struct thread *, struct linux_mknod_args *);
+int	linux_chmod(struct thread *, struct linux_chmod_args *);
+int	linux_lchown16(struct thread *, struct linux_lchown16_args *);
+int	linux_stat(struct thread *, struct linux_stat_args *);
+int	linux_lseek(struct thread *, struct linux_lseek_args *);
+int	linux_getpid(struct thread *, struct linux_getpid_args *);
+int	linux_mount(struct thread *, struct linux_mount_args *);
+int	linux_oldumount(struct thread *, struct linux_oldumount_args *);
+int	linux_setuid16(struct thread *, struct linux_setuid16_args *);
+int	linux_getuid16(struct thread *, struct linux_getuid16_args *);
+int	linux_stime(struct thread *, struct linux_stime_args *);
+int	linux_ptrace(struct thread *, struct linux_ptrace_args *);
+int	linux_alarm(struct thread *, struct linux_alarm_args *);
+int	linux_pause(struct thread *, struct linux_pause_args *);
+int	linux_utime(struct thread *, struct linux_utime_args *);
+int	linux_access(struct thread *, struct linux_access_args *);
+int	linux_nice(struct thread *, struct linux_nice_args *);
+int	linux_kill(struct thread *, struct linux_kill_args *);
+int	linux_rename(struct thread *, struct linux_rename_args *);
+int	linux_mkdir(struct thread *, struct linux_mkdir_args *);
+int	linux_rmdir(struct thread *, struct linux_rmdir_args *);
+int	linux_pipe(struct thread *, struct linux_pipe_args *);
+int	linux_times(struct thread *, struct linux_times_args *);
+int	linux_brk(struct thread *, struct linux_brk_args *);
+int	linux_setgid16(struct thread *, struct linux_setgid16_args *);
+int	linux_getgid16(struct thread *, struct linux_getgid16_args *);
+int	linux_signal(struct thread *, struct linux_signal_args *);
+int	linux_geteuid16(struct thread *, struct linux_geteuid16_args *);
+int	linux_getegid16(struct thread *, struct linux_getegid16_args *);
+int	linux_umount(struct thread *, struct linux_umount_args *);
+int	linux_ioctl(struct thread *, struct linux_ioctl_args *);
+int	linux_fcntl(struct thread *, struct linux_fcntl_args *);
+int	linux_olduname(struct thread *, struct linux_olduname_args *);
+int	linux_ustat(struct thread *, struct linux_ustat_args *);
+int	linux_getppid(struct thread *, struct linux_getppid_args *);
+int	linux_sigaction(struct thread *, struct linux_sigaction_args *);
+int	linux_sgetmask(struct thread *, struct linux_sgetmask_args *);
+int	linux_ssetmask(struct thread *, struct linux_ssetmask_args *);
+int	linux_setreuid16(struct thread *, struct linux_setreuid16_args *);
+int	linux_setregid16(struct thread *, struct linux_setregid16_args *);
+int	linux_sigsuspend(struct thread *, struct linux_sigsuspend_args *);
+int	linux_sigpending(struct thread *, struct linux_sigpending_args *);
+int	linux_sethostname(struct thread *, struct linux_sethostname_args *);
+int	linux_setrlimit(struct thread *, struct linux_setrlimit_args *);
+int	linux_old_getrlimit(struct thread *, struct linux_old_getrlimit_args *);
+int	linux_getrusage(struct thread *, struct linux_getrusage_args *);
+int	linux_gettimeofday(struct thread *, struct linux_gettimeofday_args *);
+int	linux_settimeofday(struct thread *, struct linux_settimeofday_args *);
+int	linux_getgroups16(struct thread *, struct linux_getgroups16_args *);
+int	linux_setgroups16(struct thread *, struct linux_setgroups16_args *);
+int	linux_old_select(struct thread *, struct linux_old_select_args *);
+int	linux_symlink(struct thread *, struct linux_symlink_args *);
+int	linux_lstat(struct thread *, struct linux_lstat_args *);
+int	linux_readlink(struct thread *, struct linux_readlink_args *);
+int	linux_reboot(struct thread *, struct linux_reboot_args *);
+int	linux_readdir(struct thread *, struct linux_readdir_args *);
+int	linux_mmap(struct thread *, struct linux_mmap_args *);
+int	linux_truncate(struct thread *, struct linux_truncate_args *);
+int	linux_ftruncate(struct thread *, struct linux_ftruncate_args *);
+int	linux_getpriority(struct thread *, struct linux_getpriority_args *);
+int	linux_statfs(struct thread *, struct linux_statfs_args *);
+int	linux_fstatfs(struct thread *, struct linux_fstatfs_args *);
+int	linux_socketcall(struct thread *, struct linux_socketcall_args *);
+int	linux_syslog(struct thread *, struct linux_syslog_args *);
+int	linux_setitimer(struct thread *, struct linux_setitimer_args *);
+int	linux_getitimer(struct thread *, struct linux_getitimer_args *);
+int	linux_newstat(struct thread *, struct linux_newstat_args *);
+int	linux_newlstat(struct thread *, struct linux_newlstat_args *);
+int	linux_newfstat(struct thread *, struct linux_newfstat_args *);
+int	linux_uname(struct thread *, struct linux_uname_args *);
+int	linux_iopl(struct thread *, struct linux_iopl_args *);
+int	linux_vhangup(struct thread *, struct linux_vhangup_args *);
+int	linux_wait4(struct thread *, struct linux_wait4_args *);
+int	linux_swapoff(struct thread *, struct linux_swapoff_args *);
+int	linux_sysinfo(struct thread *, struct linux_sysinfo_args *);
+int	linux_ipc(struct thread *, struct linux_ipc_args *);
+int	linux_sigreturn(struct thread *, struct linux_sigreturn_args *);
+int	linux_clone(struct thread *, struct linux_clone_args *);
+int	linux_setdomainname(struct thread *, struct linux_setdomainname_args *);
+int	linux_newuname(struct thread *, struct linux_newuname_args *);
+int	linux_adjtimex(struct thread *, struct linux_adjtimex_args *);
+int	linux_mprotect(struct thread *, struct linux_mprotect_args *);
+int	linux_sigprocmask(struct thread *, struct linux_sigprocmask_args *);
+int	linux_create_module(struct thread *, struct linux_create_module_args *);
+int	linux_init_module(struct thread *, struct linux_init_module_args *);
+int	linux_delete_module(struct thread *, struct linux_delete_module_args *);
+int	linux_get_kernel_syms(struct thread *, struct linux_get_kernel_syms_args *);
+int	linux_quotactl(struct thread *, struct linux_quotactl_args *);
+int	linux_bdflush(struct thread *, struct linux_bdflush_args *);
+int	linux_sysfs(struct thread *, struct linux_sysfs_args *);
+int	linux_personality(struct thread *, struct linux_personality_args *);
+int	linux_setfsuid16(struct thread *, struct linux_setfsuid16_args *);
+int	linux_setfsgid16(struct thread *, struct linux_setfsgid16_args *);
+int	linux_llseek(struct thread *, struct linux_llseek_args *);
+int	linux_getdents(struct thread *, struct linux_getdents_args *);
+int	linux_select(struct thread *, struct linux_select_args *);
+int	linux_msync(struct thread *, struct linux_msync_args *);
+int	linux_readv(struct thread *, struct linux_readv_args *);
+int	linux_writev(struct thread *, struct linux_writev_args *);
+int	linux_getsid(struct thread *, struct linux_getsid_args *);
+int	linux_fdatasync(struct thread *, struct linux_fdatasync_args *);
+int	linux_sysctl(struct thread *, struct linux_sysctl_args *);
+int	linux_sched_setscheduler(struct thread *, struct linux_sched_setscheduler_args *);
+int	linux_sched_getscheduler(struct thread *, struct linux_sched_getscheduler_args *);
+int	linux_sched_get_priority_max(struct thread *, struct linux_sched_get_priority_max_args *);
+int	linux_sched_get_priority_min(struct thread *, struct linux_sched_get_priority_min_args *);
+int	linux_sched_rr_get_interval(struct thread *, struct linux_sched_rr_get_interval_args *);
+int	linux_nanosleep(struct thread *, struct linux_nanosleep_args *);
+int	linux_mremap(struct thread *, struct linux_mremap_args *);
+int	linux_setresuid16(struct thread *, struct linux_setresuid16_args *);
+int	linux_getresuid16(struct thread *, struct linux_getresuid16_args *);
+int	linux_query_module(struct thread *, struct linux_query_module_args *);
+int	linux_nfsservctl(struct thread *, struct linux_nfsservctl_args *);
+int	linux_setresgid16(struct thread *, struct linux_setresgid16_args *);
+int	linux_getresgid16(struct thread *, struct linux_getresgid16_args *);
+int	linux_prctl(struct thread *, struct linux_prctl_args *);
+int	linux_rt_sigreturn(struct thread *, struct linux_rt_sigreturn_args *);
+int	linux_rt_sigaction(struct thread *, struct linux_rt_sigaction_args *);
+int	linux_rt_sigprocmask(struct thread *, struct linux_rt_sigprocmask_args *);
+int	linux_rt_sigpending(struct thread *, struct linux_rt_sigpending_args *);
+int	linux_rt_sigtimedwait(struct thread *, struct linux_rt_sigtimedwait_args *);
+int	linux_rt_sigqueueinfo(struct thread *, struct linux_rt_sigqueueinfo_args *);
+int	linux_rt_sigsuspend(struct thread *, struct linux_rt_sigsuspend_args *);
+int	linux_pread(struct thread *, struct linux_pread_args *);
+int	linux_pwrite(struct thread *, struct linux_pwrite_args *);
+int	linux_chown16(struct thread *, struct linux_chown16_args *);
+int	linux_getcwd(struct thread *, struct linux_getcwd_args *);
+int	linux_capget(struct thread *, struct linux_capget_args *);
+int	linux_capset(struct thread *, struct linux_capset_args *);
+int	linux_sigaltstack(struct thread *, struct linux_sigaltstack_args *);
+int	linux_sendfile(struct thread *, struct linux_sendfile_args *);
+int	linux_vfork(struct thread *, struct linux_vfork_args *);
+int	linux_getrlimit(struct thread *, struct linux_getrlimit_args *);
+int	linux_mmap2(struct thread *, struct linux_mmap2_args *);
+int	linux_truncate64(struct thread *, struct linux_truncate64_args *);
+int	linux_ftruncate64(struct thread *, struct linux_ftruncate64_args *);
+int	linux_stat64(struct thread *, struct linux_stat64_args *);
+int	linux_lstat64(struct thread *, struct linux_lstat64_args *);
+int	linux_fstat64(struct thread *, struct linux_fstat64_args *);
+int	linux_lchown(struct thread *, struct linux_lchown_args *);
+int	linux_getuid(struct thread *, struct linux_getuid_args *);
+int	linux_getgid(struct thread *, struct linux_getgid_args *);
+int	linux_getgroups(struct thread *, struct linux_getgroups_args *);
+int	linux_setgroups(struct thread *, struct linux_setgroups_args *);
+int	linux_chown(struct thread *, struct linux_chown_args *);
+int	linux_setfsuid(struct thread *, struct linux_setfsuid_args *);
+int	linux_setfsgid(struct thread *, struct linux_setfsgid_args *);
+int	linux_pivot_root(struct thread *, struct linux_pivot_root_args *);
+int	linux_mincore(struct thread *, struct linux_mincore_args *);
+int	linux_getdents64(struct thread *, struct linux_getdents64_args *);
+int	linux_fcntl64(struct thread *, struct linux_fcntl64_args *);
+int	linux_gettid(struct thread *, struct linux_gettid_args *);
+int	linux_setxattr(struct thread *, struct linux_setxattr_args *);
+int	linux_lsetxattr(struct thread *, struct linux_lsetxattr_args *);
+int	linux_fsetxattr(struct thread *, struct linux_fsetxattr_args *);
+int	linux_getxattr(struct thread *, struct linux_getxattr_args *);
+int	linux_lgetxattr(struct thread *, struct linux_lgetxattr_args *);
+int	linux_fgetxattr(struct thread *, struct linux_fgetxattr_args *);
+int	linux_listxattr(struct thread *, struct linux_listxattr_args *);
+int	linux_llistxattr(struct thread *, struct linux_llistxattr_args *);
+int	linux_flistxattr(struct thread *, struct linux_flistxattr_args *);
+int	linux_removexattr(struct thread *, struct linux_removexattr_args *);
+int	linux_lremovexattr(struct thread *, struct linux_lremovexattr_args *);
+int	linux_fremovexattr(struct thread *, struct linux_fremovexattr_args *);
+int	linux_tkill(struct thread *, struct linux_tkill_args *);
+int	linux_sys_futex(struct thread *, struct linux_sys_futex_args *);
+int	linux_sched_setaffinity(struct thread *, struct linux_sched_setaffinity_args *);
+int	linux_sched_getaffinity(struct thread *, struct linux_sched_getaffinity_args *);
+int	linux_set_thread_area(struct thread *, struct linux_set_thread_area_args *);
+int	linux_fadvise64(struct thread *, struct linux_fadvise64_args *);
+int	linux_exit_group(struct thread *, struct linux_exit_group_args *);
+int	linux_lookup_dcookie(struct thread *, struct linux_lookup_dcookie_args *);
+int	linux_epoll_create(struct thread *, struct linux_epoll_create_args *);
+int	linux_epoll_ctl(struct thread *, struct linux_epoll_ctl_args *);
+int	linux_epoll_wait(struct thread *, struct linux_epoll_wait_args *);
+int	linux_remap_file_pages(struct thread *, struct linux_remap_file_pages_args *);
+int	linux_set_tid_address(struct thread *, struct linux_set_tid_address_args *);
+int	linux_timer_create(struct thread *, struct linux_timer_create_args *);
+int	linux_timer_settime(struct thread *, struct linux_timer_settime_args *);
+int	linux_timer_gettime(struct thread *, struct linux_timer_gettime_args *);
+int	linux_timer_getoverrun(struct thread *, struct linux_timer_getoverrun_args *);
+int	linux_timer_delete(struct thread *, struct linux_timer_delete_args *);
+int	linux_clock_settime(struct thread *, struct linux_clock_settime_args *);
+int	linux_clock_gettime(struct thread *, struct linux_clock_gettime_args *);
+int	linux_clock_getres(struct thread *, struct linux_clock_getres_args *);
+int	linux_clock_nanosleep(struct thread *, struct linux_clock_nanosleep_args *);
+int	linux_statfs64(struct thread *, struct linux_statfs64_args *);
+int	linux_fstatfs64(struct thread *, struct linux_fstatfs64_args *);
+int	linux_tgkill(struct thread *, struct linux_tgkill_args *);
+int	linux_utimes(struct thread *, struct linux_utimes_args *);
+int	linux_fadvise64_64(struct thread *, struct linux_fadvise64_64_args *);
+int	linux_mbind(struct thread *, struct linux_mbind_args *);
+int	linux_get_mempolicy(struct thread *, struct linux_get_mempolicy_args *);
+int	linux_set_mempolicy(struct thread *, struct linux_set_mempolicy_args *);
+int	linux_mq_open(struct thread *, struct linux_mq_open_args *);
+int	linux_mq_unlink(struct thread *, struct linux_mq_unlink_args *);
+int	linux_mq_timedsend(struct thread *, struct linux_mq_timedsend_args *);
+int	linux_mq_timedreceive(struct thread *, struct linux_mq_timedreceive_args *);
+int	linux_mq_notify(struct thread *, struct linux_mq_notify_args *);
+int	linux_mq_getsetattr(struct thread *, struct linux_mq_getsetattr_args *);
+int	linux_kexec_load(struct thread *, struct linux_kexec_load_args *);
+int	linux_waitid(struct thread *, struct linux_waitid_args *);
+int	linux_add_key(struct thread *, struct linux_add_key_args *);
+int	linux_request_key(struct thread *, struct linux_request_key_args *);
+int	linux_keyctl(struct thread *, struct linux_keyctl_args *);
+int	linux_ioprio_set(struct thread *, struct linux_ioprio_set_args *);
+int	linux_ioprio_get(struct thread *, struct linux_ioprio_get_args *);
+int	linux_inotify_init(struct thread *, struct linux_inotify_init_args *);
+int	linux_inotify_add_watch(struct thread *, struct linux_inotify_add_watch_args *);
+int	linux_inotify_rm_watch(struct thread *, struct linux_inotify_rm_watch_args *);
+int	linux_migrate_pages(struct thread *, struct linux_migrate_pages_args *);
+int	linux_openat(struct thread *, struct linux_openat_args *);
+int	linux_mkdirat(struct thread *, struct linux_mkdirat_args *);
+int	linux_mknodat(struct thread *, struct linux_mknodat_args *);
+int	linux_fchownat(struct thread *, struct linux_fchownat_args *);
+int	linux_futimesat(struct thread *, struct linux_futimesat_args *);
+int	linux_fstatat64(struct thread *, struct linux_fstatat64_args *);
+int	linux_unlinkat(struct thread *, struct linux_unlinkat_args *);
+int	linux_renameat(struct thread *, struct linux_renameat_args *);
+int	linux_linkat(struct thread *, struct linux_linkat_args *);
+int	linux_symlinkat(struct thread *, struct linux_symlinkat_args *);
+int	linux_readlinkat(struct thread *, struct linux_readlinkat_args *);
+int	linux_fchmodat(struct thread *, struct linux_fchmodat_args *);
+int	linux_faccessat(struct thread *, struct linux_faccessat_args *);
+int	linux_pselect6(struct thread *, struct linux_pselect6_args *);
+int	linux_ppoll(struct thread *, struct linux_ppoll_args *);
+int	linux_unshare(struct thread *, struct linux_unshare_args *);
+int	linux_set_robust_list(struct thread *, struct linux_set_robust_list_args *);
+int	linux_get_robust_list(struct thread *, struct linux_get_robust_list_args *);
+int	linux_splice(struct thread *, struct linux_splice_args *);
+int	linux_sync_file_range(struct thread *, struct linux_sync_file_range_args *);
+int	linux_tee(struct thread *, struct linux_tee_args *);
+int	linux_vmsplice(struct thread *, struct linux_vmsplice_args *);
+int	linux_move_pages(struct thread *, struct linux_move_pages_args *);
+int	linux_getcpu(struct thread *, struct linux_getcpu_args *);
+int	linux_epoll_pwait(struct thread *, struct linux_epoll_pwait_args *);
+int	linux_utimensat(struct thread *, struct linux_utimensat_args *);
+int	linux_signalfd(struct thread *, struct linux_signalfd_args *);
+int	linux_timerfd_create(struct thread *, struct linux_timerfd_create_args *);
+int	linux_eventfd(struct thread *, struct linux_eventfd_args *);
+int	linux_fallocate(struct thread *, struct linux_fallocate_args *);
+int	linux_timerfd_settime(struct thread *, struct linux_timerfd_settime_args *);
+int	linux_timerfd_gettime(struct thread *, struct linux_timerfd_gettime_args *);
+int	linux_signalfd4(struct thread *, struct linux_signalfd4_args *);
+int	linux_eventfd2(struct thread *, struct linux_eventfd2_args *);
+int	linux_epoll_create1(struct thread *, struct linux_epoll_create1_args *);
+int	linux_dup3(struct thread *, struct linux_dup3_args *);
+int	linux_pipe2(struct thread *, struct linux_pipe2_args *);
+int	linux_inotify_init1(struct thread *, struct linux_inotify_init1_args *);
+int	linux_preadv(struct thread *, struct linux_preadv_args *);
+int	linux_pwritev(struct thread *, struct linux_pwritev_args *);
+int	linux_rt_tsigqueueinfo(struct thread *, struct linux_rt_tsigqueueinfo_args *);
+int	linux_perf_event_open(struct thread *, struct linux_perf_event_open_args *);
+int	linux_recvmmsg(struct thread *, struct linux_recvmmsg_args *);
+int	linux_fanotify_init(struct thread *, struct linux_fanotify_init_args *);
+int	linux_fanotify_mark(struct thread *, struct linux_fanotify_mark_args *);
+int	linux_prlimit64(struct thread *, struct linux_prlimit64_args *);
+int	linux_name_to_handle_at(struct thread *, struct linux_name_to_handle_at_args *);
+int	linux_open_by_handle_at(struct thread *, struct linux_open_by_handle_at_args *);
+int	linux_clock_adjtime(struct thread *, struct linux_clock_adjtime_args *);
+int	linux_syncfs(struct thread *, struct linux_syncfs_args *);
+int	linux_sendmmsg(struct thread *, struct linux_sendmmsg_args *);
+int	linux_setns(struct thread *, struct linux_setns_args *);
+int	linux_process_vm_readv(struct thread *, struct linux_process_vm_readv_args *);
+int	linux_process_vm_writev(struct thread *, struct linux_process_vm_writev_args *);
+
+#ifdef COMPAT_43
+
+#define	nosys	linux_nosys
+
+#endif /* COMPAT_43 */
+
+
+#ifdef COMPAT_FREEBSD4
+
+#define	nosys	linux_nosys
+
+#endif /* COMPAT_FREEBSD4 */
+
+
+#ifdef COMPAT_FREEBSD6
+
+#define	nosys	linux_nosys
+
+#endif /* COMPAT_FREEBSD6 */
+
+
+#ifdef COMPAT_FREEBSD7
+
+#define	nosys	linux_nosys
+
+#endif /* COMPAT_FREEBSD7 */
+
+#define	LINUX_SYS_AUE_linux_fork	AUE_FORK
+#define	LINUX_SYS_AUE_linux_open	AUE_OPEN_RWTC
+#define	LINUX_SYS_AUE_linux_waitpid	AUE_WAIT4
+#define	LINUX_SYS_AUE_linux_creat	AUE_CREAT
+#define	LINUX_SYS_AUE_linux_link	AUE_LINK
+#define	LINUX_SYS_AUE_linux_unlink	AUE_UNLINK
+#define	LINUX_SYS_AUE_linux_execve	AUE_EXECVE
+#define	LINUX_SYS_AUE_linux_chdir	AUE_CHDIR
+#define	LINUX_SYS_AUE_linux_time	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mknod	AUE_MKNOD
+#define	LINUX_SYS_AUE_linux_chmod	AUE_CHMOD
+#define	LINUX_SYS_AUE_linux_lchown16	AUE_LCHOWN
+#define	LINUX_SYS_AUE_linux_stat	AUE_STAT
+#define	LINUX_SYS_AUE_linux_lseek	AUE_LSEEK
+#define	LINUX_SYS_AUE_linux_getpid	AUE_GETPID
+#define	LINUX_SYS_AUE_linux_mount	AUE_MOUNT
+#define	LINUX_SYS_AUE_linux_oldumount	AUE_UMOUNT
+#define	LINUX_SYS_AUE_linux_setuid16	AUE_SETUID
+#define	LINUX_SYS_AUE_linux_getuid16	AUE_GETUID
+#define	LINUX_SYS_AUE_linux_stime	AUE_SETTIMEOFDAY
+#define	LINUX_SYS_AUE_linux_ptrace	AUE_PTRACE
+#define	LINUX_SYS_AUE_linux_alarm	AUE_NULL
+#define	LINUX_SYS_AUE_linux_pause	AUE_NULL
+#define	LINUX_SYS_AUE_linux_utime	AUE_UTIME
+#define	LINUX_SYS_AUE_linux_access	AUE_ACCESS
+#define	LINUX_SYS_AUE_linux_nice	AUE_NICE
+#define	LINUX_SYS_AUE_linux_kill	AUE_KILL
+#define	LINUX_SYS_AUE_linux_rename	AUE_RENAME
+#define	LINUX_SYS_AUE_linux_mkdir	AUE_MKDIR
+#define	LINUX_SYS_AUE_linux_rmdir	AUE_RMDIR
+#define	LINUX_SYS_AUE_linux_pipe	AUE_PIPE
+#define	LINUX_SYS_AUE_linux_times	AUE_NULL
+#define	LINUX_SYS_AUE_linux_brk	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setgid16	AUE_SETGID
+#define	LINUX_SYS_AUE_linux_getgid16	AUE_GETGID
+#define	LINUX_SYS_AUE_linux_signal	AUE_NULL
+#define	LINUX_SYS_AUE_linux_geteuid16	AUE_GETEUID
+#define	LINUX_SYS_AUE_linux_getegid16	AUE_GETEGID
+#define	LINUX_SYS_AUE_linux_umount	AUE_UMOUNT
+#define	LINUX_SYS_AUE_linux_ioctl	AUE_IOCTL
+#define	LINUX_SYS_AUE_linux_fcntl	AUE_FCNTL
+#define	LINUX_SYS_AUE_linux_olduname	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ustat	AUE_NULL
+#define	LINUX_SYS_AUE_linux_getppid	AUE_GETPPID
+#define	LINUX_SYS_AUE_linux_sigaction	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sgetmask	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ssetmask	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setreuid16	AUE_SETREUID
+#define	LINUX_SYS_AUE_linux_setregid16	AUE_SETREGID
+#define	LINUX_SYS_AUE_linux_sigsuspend	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sigpending	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sethostname	AUE_SYSCTL
+#define	LINUX_SYS_AUE_linux_setrlimit	AUE_SETRLIMIT
+#define	LINUX_SYS_AUE_linux_old_getrlimit	AUE_GETRLIMIT
+#define	LINUX_SYS_AUE_linux_getrusage	AUE_GETRUSAGE
+#define	LINUX_SYS_AUE_linux_gettimeofday	AUE_NULL
+#define	LINUX_SYS_AUE_linux_settimeofday	AUE_SETTIMEOFDAY
+#define	LINUX_SYS_AUE_linux_getgroups16	AUE_GETGROUPS
+#define	LINUX_SYS_AUE_linux_setgroups16	AUE_SETGROUPS
+#define	LINUX_SYS_AUE_linux_old_select	AUE_SELECT
+#define	LINUX_SYS_AUE_linux_symlink	AUE_SYMLINK
+#define	LINUX_SYS_AUE_linux_lstat	AUE_LSTAT
+#define	LINUX_SYS_AUE_linux_readlink	AUE_READLINK
+#define	LINUX_SYS_AUE_linux_reboot	AUE_REBOOT
+#define	LINUX_SYS_AUE_linux_readdir	AUE_GETDIRENTRIES
+#define	LINUX_SYS_AUE_linux_mmap	AUE_MMAP
+#define	LINUX_SYS_AUE_linux_truncate	AUE_TRUNCATE
+#define	LINUX_SYS_AUE_linux_ftruncate	AUE_FTRUNCATE
+#define	LINUX_SYS_AUE_linux_getpriority	AUE_GETPRIORITY
+#define	LINUX_SYS_AUE_linux_statfs	AUE_STATFS
+#define	LINUX_SYS_AUE_linux_fstatfs	AUE_FSTATFS
+#define	LINUX_SYS_AUE_linux_socketcall	AUE_NULL
+#define	LINUX_SYS_AUE_linux_syslog	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setitimer	AUE_SETITIMER
+#define	LINUX_SYS_AUE_linux_getitimer	AUE_GETITIMER
+#define	LINUX_SYS_AUE_linux_newstat	AUE_STAT
+#define	LINUX_SYS_AUE_linux_newlstat	AUE_LSTAT
+#define	LINUX_SYS_AUE_linux_newfstat	AUE_FSTAT
+#define	LINUX_SYS_AUE_linux_uname	AUE_NULL
+#define	LINUX_SYS_AUE_linux_iopl	AUE_NULL
+#define	LINUX_SYS_AUE_linux_vhangup	AUE_NULL
+#define	LINUX_SYS_AUE_linux_wait4	AUE_WAIT4
+#define	LINUX_SYS_AUE_linux_swapoff	AUE_SWAPOFF
+#define	LINUX_SYS_AUE_linux_sysinfo	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ipc	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sigreturn	AUE_SIGRETURN
+#define	LINUX_SYS_AUE_linux_clone	AUE_RFORK
+#define	LINUX_SYS_AUE_linux_setdomainname	AUE_SYSCTL
+#define	LINUX_SYS_AUE_linux_newuname	AUE_NULL
+#define	LINUX_SYS_AUE_linux_adjtimex	AUE_ADJTIME
+#define	LINUX_SYS_AUE_linux_mprotect	AUE_MPROTECT
+#define	LINUX_SYS_AUE_linux_sigprocmask	AUE_SIGPROCMASK
+#define	LINUX_SYS_AUE_linux_create_module	AUE_NULL
+#define	LINUX_SYS_AUE_linux_init_module	AUE_NULL
+#define	LINUX_SYS_AUE_linux_delete_module	AUE_NULL
+#define	LINUX_SYS_AUE_linux_get_kernel_syms	AUE_NULL
+#define	LINUX_SYS_AUE_linux_quotactl	AUE_QUOTACTL
+#define	LINUX_SYS_AUE_linux_bdflush	AUE_BDFLUSH
+#define	LINUX_SYS_AUE_linux_sysfs	AUE_NULL
+#define	LINUX_SYS_AUE_linux_personality	AUE_PERSONALITY
+#define	LINUX_SYS_AUE_linux_setfsuid16	AUE_SETFSUID
+#define	LINUX_SYS_AUE_linux_setfsgid16	AUE_SETFSGID
+#define	LINUX_SYS_AUE_linux_llseek	AUE_LSEEK
+#define	LINUX_SYS_AUE_linux_getdents	AUE_GETDIRENTRIES
+#define	LINUX_SYS_AUE_linux_select	AUE_SELECT
+#define	LINUX_SYS_AUE_linux_msync	AUE_MSYNC
+#define	LINUX_SYS_AUE_linux_readv	AUE_READV
+#define	LINUX_SYS_AUE_linux_writev	AUE_WRITEV
+#define	LINUX_SYS_AUE_linux_getsid	AUE_GETSID
+#define	LINUX_SYS_AUE_linux_fdatasync	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sysctl	AUE_SYSCTL
+#define	LINUX_SYS_AUE_linux_sched_setscheduler	AUE_SCHED_SETSCHEDULER
+#define	LINUX_SYS_AUE_linux_sched_getscheduler	AUE_SCHED_GETSCHEDULER
+#define	LINUX_SYS_AUE_linux_sched_get_priority_max	AUE_SCHED_GET_PRIORITY_MAX
+#define	LINUX_SYS_AUE_linux_sched_get_priority_min	AUE_SCHED_GET_PRIORITY_MIN
+#define	LINUX_SYS_AUE_linux_sched_rr_get_interval	AUE_SCHED_RR_GET_INTERVAL
+#define	LINUX_SYS_AUE_linux_nanosleep	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mremap	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setresuid16	AUE_SETRESUID
+#define	LINUX_SYS_AUE_linux_getresuid16	AUE_GETRESUID
+#define	LINUX_SYS_AUE_linux_query_module	AUE_NULL
+#define	LINUX_SYS_AUE_linux_nfsservctl	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setresgid16	AUE_SETRESGID
+#define	LINUX_SYS_AUE_linux_getresgid16	AUE_GETRESGID
+#define	LINUX_SYS_AUE_linux_prctl	AUE_PRCTL
+#define	LINUX_SYS_AUE_linux_rt_sigreturn	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigaction	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigprocmask	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigpending	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigtimedwait	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigqueueinfo	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_sigsuspend	AUE_NULL
+#define	LINUX_SYS_AUE_linux_pread	AUE_PREAD
+#define	LINUX_SYS_AUE_linux_pwrite	AUE_PWRITE
+#define	LINUX_SYS_AUE_linux_chown16	AUE_CHOWN
+#define	LINUX_SYS_AUE_linux_getcwd	AUE_GETCWD
+#define	LINUX_SYS_AUE_linux_capget	AUE_CAPGET
+#define	LINUX_SYS_AUE_linux_capset	AUE_CAPSET
+#define	LINUX_SYS_AUE_linux_sigaltstack	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sendfile	AUE_SENDFILE
+#define	LINUX_SYS_AUE_linux_vfork	AUE_VFORK
+#define	LINUX_SYS_AUE_linux_getrlimit	AUE_GETRLIMIT
+#define	LINUX_SYS_AUE_linux_mmap2	AUE_MMAP
+#define	LINUX_SYS_AUE_linux_truncate64	AUE_TRUNCATE
+#define	LINUX_SYS_AUE_linux_ftruncate64	AUE_FTRUNCATE
+#define	LINUX_SYS_AUE_linux_stat64	AUE_STAT
+#define	LINUX_SYS_AUE_linux_lstat64	AUE_LSTAT
+#define	LINUX_SYS_AUE_linux_fstat64	AUE_FSTAT
+#define	LINUX_SYS_AUE_linux_lchown	AUE_LCHOWN
+#define	LINUX_SYS_AUE_linux_getuid	AUE_GETUID
+#define	LINUX_SYS_AUE_linux_getgid	AUE_GETGID
+#define	LINUX_SYS_AUE_linux_getgroups	AUE_GETGROUPS
+#define	LINUX_SYS_AUE_linux_setgroups	AUE_SETGROUPS
+#define	LINUX_SYS_AUE_linux_chown	AUE_CHOWN
+#define	LINUX_SYS_AUE_linux_setfsuid	AUE_SETFSUID
+#define	LINUX_SYS_AUE_linux_setfsgid	AUE_SETFSGID
+#define	LINUX_SYS_AUE_linux_pivot_root	AUE_PIVOT_ROOT
+#define	LINUX_SYS_AUE_linux_mincore	AUE_MINCORE
+#define	LINUX_SYS_AUE_linux_getdents64	AUE_GETDIRENTRIES
+#define	LINUX_SYS_AUE_linux_fcntl64	AUE_FCNTL
+#define	LINUX_SYS_AUE_linux_gettid	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_lsetxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fsetxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_getxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_lgetxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fgetxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_listxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_llistxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_flistxattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_removexattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_lremovexattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fremovexattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_tkill	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sys_futex	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sched_setaffinity	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sched_getaffinity	AUE_NULL
+#define	LINUX_SYS_AUE_linux_set_thread_area	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fadvise64	AUE_NULL
+#define	LINUX_SYS_AUE_linux_exit_group	AUE_EXIT
+#define	LINUX_SYS_AUE_linux_lookup_dcookie	AUE_NULL
+#define	LINUX_SYS_AUE_linux_epoll_create	AUE_NULL
+#define	LINUX_SYS_AUE_linux_epoll_ctl	AUE_NULL
+#define	LINUX_SYS_AUE_linux_epoll_wait	AUE_NULL
+#define	LINUX_SYS_AUE_linux_remap_file_pages	AUE_NULL
+#define	LINUX_SYS_AUE_linux_set_tid_address	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timer_create	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timer_settime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timer_gettime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timer_getoverrun	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timer_delete	AUE_NULL
+#define	LINUX_SYS_AUE_linux_clock_settime	AUE_CLOCK_SETTIME
+#define	LINUX_SYS_AUE_linux_clock_gettime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_clock_getres	AUE_NULL
+#define	LINUX_SYS_AUE_linux_clock_nanosleep	AUE_NULL
+#define	LINUX_SYS_AUE_linux_statfs64	AUE_STATFS
+#define	LINUX_SYS_AUE_linux_fstatfs64	AUE_FSTATFS
+#define	LINUX_SYS_AUE_linux_tgkill	AUE_NULL
+#define	LINUX_SYS_AUE_linux_utimes	AUE_UTIMES
+#define	LINUX_SYS_AUE_linux_fadvise64_64	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mbind	AUE_NULL
+#define	LINUX_SYS_AUE_linux_get_mempolicy	AUE_NULL
+#define	LINUX_SYS_AUE_linux_set_mempolicy	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_open	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_unlink	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_timedsend	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_timedreceive	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_notify	AUE_NULL
+#define	LINUX_SYS_AUE_linux_mq_getsetattr	AUE_NULL
+#define	LINUX_SYS_AUE_linux_kexec_load	AUE_NULL
+#define	LINUX_SYS_AUE_linux_waitid	AUE_NULL
+#define	LINUX_SYS_AUE_linux_add_key	AUE_NULL
+#define	LINUX_SYS_AUE_linux_request_key	AUE_NULL
+#define	LINUX_SYS_AUE_linux_keyctl	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ioprio_set	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ioprio_get	AUE_NULL
+#define	LINUX_SYS_AUE_linux_inotify_init	AUE_NULL
+#define	LINUX_SYS_AUE_linux_inotify_add_watch	AUE_NULL
+#define	LINUX_SYS_AUE_linux_inotify_rm_watch	AUE_NULL
+#define	LINUX_SYS_AUE_linux_migrate_pages	AUE_NULL
+#define	LINUX_SYS_AUE_linux_openat	AUE_OPEN_RWTC
+#define	LINUX_SYS_AUE_linux_mkdirat	AUE_MKDIRAT
+#define	LINUX_SYS_AUE_linux_mknodat	AUE_MKNODAT
+#define	LINUX_SYS_AUE_linux_fchownat	AUE_FCHOWNAT
+#define	LINUX_SYS_AUE_linux_futimesat	AUE_FUTIMESAT
+#define	LINUX_SYS_AUE_linux_fstatat64	AUE_FSTATAT
+#define	LINUX_SYS_AUE_linux_unlinkat	AUE_UNLINKAT
+#define	LINUX_SYS_AUE_linux_renameat	AUE_RENAMEAT
+#define	LINUX_SYS_AUE_linux_linkat	AUE_LINKAT
+#define	LINUX_SYS_AUE_linux_symlinkat	AUE_SYMLINKAT
+#define	LINUX_SYS_AUE_linux_readlinkat	AUE_READLINKAT
+#define	LINUX_SYS_AUE_linux_fchmodat	AUE_FCHMODAT
+#define	LINUX_SYS_AUE_linux_faccessat	AUE_FACCESSAT
+#define	LINUX_SYS_AUE_linux_pselect6	AUE_NULL
+#define	LINUX_SYS_AUE_linux_ppoll	AUE_NULL
+#define	LINUX_SYS_AUE_linux_unshare	AUE_NULL
+#define	LINUX_SYS_AUE_linux_set_robust_list	AUE_NULL
+#define	LINUX_SYS_AUE_linux_get_robust_list	AUE_NULL
+#define	LINUX_SYS_AUE_linux_splice	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sync_file_range	AUE_NULL
+#define	LINUX_SYS_AUE_linux_tee	AUE_NULL
+#define	LINUX_SYS_AUE_linux_vmsplice	AUE_NULL
+#define	LINUX_SYS_AUE_linux_move_pages	AUE_NULL
+#define	LINUX_SYS_AUE_linux_getcpu	AUE_NULL
+#define	LINUX_SYS_AUE_linux_epoll_pwait	AUE_NULL
+#define	LINUX_SYS_AUE_linux_utimensat	AUE_NULL
+#define	LINUX_SYS_AUE_linux_signalfd	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timerfd_create	AUE_NULL
+#define	LINUX_SYS_AUE_linux_eventfd	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fallocate	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timerfd_settime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_timerfd_gettime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_signalfd4	AUE_NULL
+#define	LINUX_SYS_AUE_linux_eventfd2	AUE_NULL
+#define	LINUX_SYS_AUE_linux_epoll_create1	AUE_NULL
+#define	LINUX_SYS_AUE_linux_dup3	AUE_NULL
+#define	LINUX_SYS_AUE_linux_pipe2	AUE_NULL
+#define	LINUX_SYS_AUE_linux_inotify_init1	AUE_NULL
+#define	LINUX_SYS_AUE_linux_preadv	AUE_NULL
+#define	LINUX_SYS_AUE_linux_pwritev	AUE_NULL
+#define	LINUX_SYS_AUE_linux_rt_tsigqueueinfo	AUE_NULL
+#define	LINUX_SYS_AUE_linux_perf_event_open	AUE_NULL
+#define	LINUX_SYS_AUE_linux_recvmmsg	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fanotify_init	AUE_NULL
+#define	LINUX_SYS_AUE_linux_fanotify_mark	AUE_NULL
+#define	LINUX_SYS_AUE_linux_prlimit64	AUE_NULL
+#define	LINUX_SYS_AUE_linux_name_to_handle_at	AUE_NULL
+#define	LINUX_SYS_AUE_linux_open_by_handle_at	AUE_NULL
+#define	LINUX_SYS_AUE_linux_clock_adjtime	AUE_NULL
+#define	LINUX_SYS_AUE_linux_syncfs	AUE_NULL
+#define	LINUX_SYS_AUE_linux_sendmmsg	AUE_NULL
+#define	LINUX_SYS_AUE_linux_setns	AUE_NULL
+#define	LINUX_SYS_AUE_linux_process_vm_readv	AUE_NULL
+#define	LINUX_SYS_AUE_linux_process_vm_writev	AUE_NULL
+
+#undef PAD_
+#undef PADL_
+#undef PADR_
+
+#endif /* !_LINUX_SYSPROTO_H_ */
diff --git a/sys/amd64/linux32/linux32_support.s b/sys/amd64/linux32/linux32_support.s
new file mode 100644
index 0000000..42375c3
--- /dev/null
+++ b/sys/amd64/linux32/linux32_support.s
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2007 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "linux32_assym.h"		/* system definitions */
+#include <machine/asmacros.h>		/* miscellaneous asm macros */
+
+#include "assym.s"
+
+futex_fault:
+	movq	$0,PCB_ONFAULT(%r8)
+	movl	$-EFAULT,%eax
+	ret
+
+ENTRY(futex_xchgl)
+	movq	PCPU(CURPCB),%r8
+	movq	$futex_fault,PCB_ONFAULT(%r8)
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rsi
+	ja	futex_fault
+	xchgl	%edi,(%rsi)
+	movl	%edi,(%rdx)
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+ENTRY(futex_addl)
+	movq	PCPU(CURPCB),%r8
+	movq	$futex_fault,PCB_ONFAULT(%r8)
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rsi
+	ja	futex_fault
+#ifdef SMP
+	lock
+#endif
+	xaddl	%edi,(%rsi)
+	movl	%edi,(%rdx)
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+ENTRY(futex_orl)
+	movq	PCPU(CURPCB),%r8
+	movq	$futex_fault,PCB_ONFAULT(%r8)
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rsi
+	ja	futex_fault
+	movl	(%rsi),%eax
+1:	movl	%eax,%ecx
+	orl	%edi,%ecx
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %ecx,(%rsi)
+	jnz	1b
+	movl	%eax,(%rdx)
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+ENTRY(futex_andl)
+	movq	PCPU(CURPCB),%r8
+	movq	$futex_fault,PCB_ONFAULT(%r8)
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rsi
+	ja	futex_fault
+	movl	(%rsi),%eax
+1:	movl	%eax,%ecx
+	andl	%edi,%ecx
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %ecx,(%rsi)
+	jnz	1b
+	movl	%eax,(%rdx)
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
+
+ENTRY(futex_xorl)
+	movq	PCPU(CURPCB),%r8
+	movq	$futex_fault,PCB_ONFAULT(%r8)
+	movq	$VM_MAXUSER_ADDRESS-4,%rax
+	cmpq	%rax,%rsi
+	ja	futex_fault
+	movl	(%rsi),%eax
+1:	movl	%eax,%ecx
+	xorl	%edi,%ecx
+#ifdef SMP
+	lock
+#endif
+	cmpxchgl %ecx,(%rsi)
+	jnz	1b
+	movl	%eax,(%rdx)
+	xorl	%eax,%eax
+	movq	%rax,PCB_ONFAULT(%r8)
+	ret
diff --git a/sys/amd64/linux32/linux32_syscall.h b/sys/amd64/linux32/linux32_syscall.h
new file mode 100644
index 0000000..5a411f8
--- /dev/null
+++ b/sys/amd64/linux32/linux32_syscall.h
@@ -0,0 +1,324 @@
+/*
+ * System call numbers.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * $FreeBSD$
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed 
+ */
+
+#define	LINUX_SYS_exit	1
+#define	LINUX_SYS_linux_fork	2
+#define	LINUX_SYS_read	3
+#define	LINUX_SYS_write	4
+#define	LINUX_SYS_linux_open	5
+#define	LINUX_SYS_close	6
+#define	LINUX_SYS_linux_waitpid	7
+#define	LINUX_SYS_linux_creat	8
+#define	LINUX_SYS_linux_link	9
+#define	LINUX_SYS_linux_unlink	10
+#define	LINUX_SYS_linux_execve	11
+#define	LINUX_SYS_linux_chdir	12
+#define	LINUX_SYS_linux_time	13
+#define	LINUX_SYS_linux_mknod	14
+#define	LINUX_SYS_linux_chmod	15
+#define	LINUX_SYS_linux_lchown16	16
+#define	LINUX_SYS_linux_stat	18
+#define	LINUX_SYS_linux_lseek	19
+#define	LINUX_SYS_linux_getpid	20
+#define	LINUX_SYS_linux_mount	21
+#define	LINUX_SYS_linux_oldumount	22
+#define	LINUX_SYS_linux_setuid16	23
+#define	LINUX_SYS_linux_getuid16	24
+#define	LINUX_SYS_linux_stime	25
+#define	LINUX_SYS_linux_ptrace	26
+#define	LINUX_SYS_linux_alarm	27
+#define	LINUX_SYS_linux_pause	29
+#define	LINUX_SYS_linux_utime	30
+#define	LINUX_SYS_linux_access	33
+#define	LINUX_SYS_linux_nice	34
+#define	LINUX_SYS_sync	36
+#define	LINUX_SYS_linux_kill	37
+#define	LINUX_SYS_linux_rename	38
+#define	LINUX_SYS_linux_mkdir	39
+#define	LINUX_SYS_linux_rmdir	40
+#define	LINUX_SYS_dup	41
+#define	LINUX_SYS_linux_pipe	42
+#define	LINUX_SYS_linux_times	43
+#define	LINUX_SYS_linux_brk	45
+#define	LINUX_SYS_linux_setgid16	46
+#define	LINUX_SYS_linux_getgid16	47
+#define	LINUX_SYS_linux_signal	48
+#define	LINUX_SYS_linux_geteuid16	49
+#define	LINUX_SYS_linux_getegid16	50
+#define	LINUX_SYS_acct	51
+#define	LINUX_SYS_linux_umount	52
+#define	LINUX_SYS_linux_ioctl	54
+#define	LINUX_SYS_linux_fcntl	55
+#define	LINUX_SYS_setpgid	57
+#define	LINUX_SYS_linux_olduname	59
+#define	LINUX_SYS_umask	60
+#define	LINUX_SYS_chroot	61
+#define	LINUX_SYS_linux_ustat	62
+#define	LINUX_SYS_dup2	63
+#define	LINUX_SYS_linux_getppid	64
+#define	LINUX_SYS_getpgrp	65
+#define	LINUX_SYS_setsid	66
+#define	LINUX_SYS_linux_sigaction	67
+#define	LINUX_SYS_linux_sgetmask	68
+#define	LINUX_SYS_linux_ssetmask	69
+#define	LINUX_SYS_linux_setreuid16	70
+#define	LINUX_SYS_linux_setregid16	71
+#define	LINUX_SYS_linux_sigsuspend	72
+#define	LINUX_SYS_linux_sigpending	73
+#define	LINUX_SYS_linux_sethostname	74
+#define	LINUX_SYS_linux_setrlimit	75
+#define	LINUX_SYS_linux_old_getrlimit	76
+#define	LINUX_SYS_linux_getrusage	77
+#define	LINUX_SYS_linux_gettimeofday	78
+#define	LINUX_SYS_linux_settimeofday	79
+#define	LINUX_SYS_linux_getgroups16	80
+#define	LINUX_SYS_linux_setgroups16	81
+#define	LINUX_SYS_linux_old_select	82
+#define	LINUX_SYS_linux_symlink	83
+#define	LINUX_SYS_linux_lstat	84
+#define	LINUX_SYS_linux_readlink	85
+#define	LINUX_SYS_swapon	87
+#define	LINUX_SYS_linux_reboot	88
+#define	LINUX_SYS_linux_readdir	89
+#define	LINUX_SYS_linux_mmap	90
+#define	LINUX_SYS_munmap	91
+#define	LINUX_SYS_linux_truncate	92
+#define	LINUX_SYS_linux_ftruncate	93
+#define	LINUX_SYS_fchmod	94
+#define	LINUX_SYS_fchown	95
+#define	LINUX_SYS_linux_getpriority	96
+#define	LINUX_SYS_setpriority	97
+#define	LINUX_SYS_linux_statfs	99
+#define	LINUX_SYS_linux_fstatfs	100
+#define	LINUX_SYS_linux_socketcall	102
+#define	LINUX_SYS_linux_syslog	103
+#define	LINUX_SYS_linux_setitimer	104
+#define	LINUX_SYS_linux_getitimer	105
+#define	LINUX_SYS_linux_newstat	106
+#define	LINUX_SYS_linux_newlstat	107
+#define	LINUX_SYS_linux_newfstat	108
+#define	LINUX_SYS_linux_uname	109
+#define	LINUX_SYS_linux_iopl	110
+#define	LINUX_SYS_linux_vhangup	111
+#define	LINUX_SYS_linux_wait4	114
+#define	LINUX_SYS_linux_swapoff	115
+#define	LINUX_SYS_linux_sysinfo	116
+#define	LINUX_SYS_linux_ipc	117
+#define	LINUX_SYS_fsync	118
+#define	LINUX_SYS_linux_sigreturn	119
+#define	LINUX_SYS_linux_clone	120
+#define	LINUX_SYS_linux_setdomainname	121
+#define	LINUX_SYS_linux_newuname	122
+#define	LINUX_SYS_linux_adjtimex	124
+#define	LINUX_SYS_linux_mprotect	125
+#define	LINUX_SYS_linux_sigprocmask	126
+#define	LINUX_SYS_linux_create_module	127
+#define	LINUX_SYS_linux_init_module	128
+#define	LINUX_SYS_linux_delete_module	129
+#define	LINUX_SYS_linux_get_kernel_syms	130
+#define	LINUX_SYS_linux_quotactl	131
+#define	LINUX_SYS_getpgid	132
+#define	LINUX_SYS_fchdir	133
+#define	LINUX_SYS_linux_bdflush	134
+#define	LINUX_SYS_linux_sysfs	135
+#define	LINUX_SYS_linux_personality	136
+#define	LINUX_SYS_linux_setfsuid16	138
+#define	LINUX_SYS_linux_setfsgid16	139
+#define	LINUX_SYS_linux_llseek	140
+#define	LINUX_SYS_linux_getdents	141
+#define	LINUX_SYS_linux_select	142
+#define	LINUX_SYS_flock	143
+#define	LINUX_SYS_linux_msync	144
+#define	LINUX_SYS_linux_readv	145
+#define	LINUX_SYS_linux_writev	146
+#define	LINUX_SYS_linux_getsid	147
+#define	LINUX_SYS_linux_fdatasync	148
+#define	LINUX_SYS_linux_sysctl	149
+#define	LINUX_SYS_mlock	150
+#define	LINUX_SYS_munlock	151
+#define	LINUX_SYS_mlockall	152
+#define	LINUX_SYS_munlockall	153
+#define	LINUX_SYS_sched_setparam	154
+#define	LINUX_SYS_sched_getparam	155
+#define	LINUX_SYS_linux_sched_setscheduler	156
+#define	LINUX_SYS_linux_sched_getscheduler	157
+#define	LINUX_SYS_sched_yield	158
+#define	LINUX_SYS_linux_sched_get_priority_max	159
+#define	LINUX_SYS_linux_sched_get_priority_min	160
+#define	LINUX_SYS_linux_sched_rr_get_interval	161
+#define	LINUX_SYS_linux_nanosleep	162
+#define	LINUX_SYS_linux_mremap	163
+#define	LINUX_SYS_linux_setresuid16	164
+#define	LINUX_SYS_linux_getresuid16	165
+#define	LINUX_SYS_linux_query_module	167
+#define	LINUX_SYS_poll	168
+#define	LINUX_SYS_linux_nfsservctl	169
+#define	LINUX_SYS_linux_setresgid16	170
+#define	LINUX_SYS_linux_getresgid16	171
+#define	LINUX_SYS_linux_prctl	172
+#define	LINUX_SYS_linux_rt_sigreturn	173
+#define	LINUX_SYS_linux_rt_sigaction	174
+#define	LINUX_SYS_linux_rt_sigprocmask	175
+#define	LINUX_SYS_linux_rt_sigpending	176
+#define	LINUX_SYS_linux_rt_sigtimedwait	177
+#define	LINUX_SYS_linux_rt_sigqueueinfo	178
+#define	LINUX_SYS_linux_rt_sigsuspend	179
+#define	LINUX_SYS_linux_pread	180
+#define	LINUX_SYS_linux_pwrite	181
+#define	LINUX_SYS_linux_chown16	182
+#define	LINUX_SYS_linux_getcwd	183
+#define	LINUX_SYS_linux_capget	184
+#define	LINUX_SYS_linux_capset	185
+#define	LINUX_SYS_linux_sigaltstack	186
+#define	LINUX_SYS_linux_sendfile	187
+#define	LINUX_SYS_linux_vfork	190
+#define	LINUX_SYS_linux_getrlimit	191
+#define	LINUX_SYS_linux_mmap2	192
+#define	LINUX_SYS_linux_truncate64	193
+#define	LINUX_SYS_linux_ftruncate64	194
+#define	LINUX_SYS_linux_stat64	195
+#define	LINUX_SYS_linux_lstat64	196
+#define	LINUX_SYS_linux_fstat64	197
+#define	LINUX_SYS_linux_lchown	198
+#define	LINUX_SYS_linux_getuid	199
+#define	LINUX_SYS_linux_getgid	200
+#define	LINUX_SYS_geteuid	201
+#define	LINUX_SYS_getegid	202
+#define	LINUX_SYS_setreuid	203
+#define	LINUX_SYS_setregid	204
+#define	LINUX_SYS_linux_getgroups	205
+#define	LINUX_SYS_linux_setgroups	206
+#define	LINUX_SYS_setresuid	208
+#define	LINUX_SYS_getresuid	209
+#define	LINUX_SYS_setresgid	210
+#define	LINUX_SYS_getresgid	211
+#define	LINUX_SYS_linux_chown	212
+#define	LINUX_SYS_setuid	213
+#define	LINUX_SYS_setgid	214
+#define	LINUX_SYS_linux_setfsuid	215
+#define	LINUX_SYS_linux_setfsgid	216
+#define	LINUX_SYS_linux_pivot_root	217
+#define	LINUX_SYS_linux_mincore	218
+#define	LINUX_SYS_madvise	219
+#define	LINUX_SYS_linux_getdents64	220
+#define	LINUX_SYS_linux_fcntl64	221
+#define	LINUX_SYS_linux_gettid	224
+#define	LINUX_SYS_linux_setxattr	226
+#define	LINUX_SYS_linux_lsetxattr	227
+#define	LINUX_SYS_linux_fsetxattr	228
+#define	LINUX_SYS_linux_getxattr	229
+#define	LINUX_SYS_linux_lgetxattr	230
+#define	LINUX_SYS_linux_fgetxattr	231
+#define	LINUX_SYS_linux_listxattr	232
+#define	LINUX_SYS_linux_llistxattr	233
+#define	LINUX_SYS_linux_flistxattr	234
+#define	LINUX_SYS_linux_removexattr	235
+#define	LINUX_SYS_linux_lremovexattr	236
+#define	LINUX_SYS_linux_fremovexattr	237
+#define	LINUX_SYS_linux_tkill	238
+#define	LINUX_SYS_linux_sys_futex	240
+#define	LINUX_SYS_linux_sched_setaffinity	241
+#define	LINUX_SYS_linux_sched_getaffinity	242
+#define	LINUX_SYS_linux_set_thread_area	243
+#define	LINUX_SYS_linux_fadvise64	250
+#define	LINUX_SYS_linux_exit_group	252
+#define	LINUX_SYS_linux_lookup_dcookie	253
+#define	LINUX_SYS_linux_epoll_create	254
+#define	LINUX_SYS_linux_epoll_ctl	255
+#define	LINUX_SYS_linux_epoll_wait	256
+#define	LINUX_SYS_linux_remap_file_pages	257
+#define	LINUX_SYS_linux_set_tid_address	258
+#define	LINUX_SYS_linux_timer_create	259
+#define	LINUX_SYS_linux_timer_settime	260
+#define	LINUX_SYS_linux_timer_gettime	261
+#define	LINUX_SYS_linux_timer_getoverrun	262
+#define	LINUX_SYS_linux_timer_delete	263
+#define	LINUX_SYS_linux_clock_settime	264
+#define	LINUX_SYS_linux_clock_gettime	265
+#define	LINUX_SYS_linux_clock_getres	266
+#define	LINUX_SYS_linux_clock_nanosleep	267
+#define	LINUX_SYS_linux_statfs64	268
+#define	LINUX_SYS_linux_fstatfs64	269
+#define	LINUX_SYS_linux_tgkill	270
+#define	LINUX_SYS_linux_utimes	271
+#define	LINUX_SYS_linux_fadvise64_64	272
+#define	LINUX_SYS_linux_mbind	274
+#define	LINUX_SYS_linux_get_mempolicy	275
+#define	LINUX_SYS_linux_set_mempolicy	276
+#define	LINUX_SYS_linux_mq_open	277
+#define	LINUX_SYS_linux_mq_unlink	278
+#define	LINUX_SYS_linux_mq_timedsend	279
+#define	LINUX_SYS_linux_mq_timedreceive	280
+#define	LINUX_SYS_linux_mq_notify	281
+#define	LINUX_SYS_linux_mq_getsetattr	282
+#define	LINUX_SYS_linux_kexec_load	283
+#define	LINUX_SYS_linux_waitid	284
+#define	LINUX_SYS_linux_add_key	286
+#define	LINUX_SYS_linux_request_key	287
+#define	LINUX_SYS_linux_keyctl	288
+#define	LINUX_SYS_linux_ioprio_set	289
+#define	LINUX_SYS_linux_ioprio_get	290
+#define	LINUX_SYS_linux_inotify_init	291
+#define	LINUX_SYS_linux_inotify_add_watch	292
+#define	LINUX_SYS_linux_inotify_rm_watch	293
+#define	LINUX_SYS_linux_migrate_pages	294
+#define	LINUX_SYS_linux_openat	295
+#define	LINUX_SYS_linux_mkdirat	296
+#define	LINUX_SYS_linux_mknodat	297
+#define	LINUX_SYS_linux_fchownat	298
+#define	LINUX_SYS_linux_futimesat	299
+#define	LINUX_SYS_linux_fstatat64	300
+#define	LINUX_SYS_linux_unlinkat	301
+#define	LINUX_SYS_linux_renameat	302
+#define	LINUX_SYS_linux_linkat	303
+#define	LINUX_SYS_linux_symlinkat	304
+#define	LINUX_SYS_linux_readlinkat	305
+#define	LINUX_SYS_linux_fchmodat	306
+#define	LINUX_SYS_linux_faccessat	307
+#define	LINUX_SYS_linux_pselect6	308
+#define	LINUX_SYS_linux_ppoll	309
+#define	LINUX_SYS_linux_unshare	310
+#define	LINUX_SYS_linux_set_robust_list	311
+#define	LINUX_SYS_linux_get_robust_list	312
+#define	LINUX_SYS_linux_splice	313
+#define	LINUX_SYS_linux_sync_file_range	314
+#define	LINUX_SYS_linux_tee	315
+#define	LINUX_SYS_linux_vmsplice	316
+#define	LINUX_SYS_linux_move_pages	317
+#define	LINUX_SYS_linux_getcpu	318
+#define	LINUX_SYS_linux_epoll_pwait	319
+#define	LINUX_SYS_linux_utimensat	320
+#define	LINUX_SYS_linux_signalfd	321
+#define	LINUX_SYS_linux_timerfd_create	322
+#define	LINUX_SYS_linux_eventfd	323
+#define	LINUX_SYS_linux_fallocate	324
+#define	LINUX_SYS_linux_timerfd_settime	325
+#define	LINUX_SYS_linux_timerfd_gettime	326
+#define	LINUX_SYS_linux_signalfd4	327
+#define	LINUX_SYS_linux_eventfd2	328
+#define	LINUX_SYS_linux_epoll_create1	329
+#define	LINUX_SYS_linux_dup3	330
+#define	LINUX_SYS_linux_pipe2	331
+#define	LINUX_SYS_linux_inotify_init1	332
+#define	LINUX_SYS_linux_preadv	333
+#define	LINUX_SYS_linux_pwritev	334
+#define	LINUX_SYS_linux_rt_tsigqueueinfo	335
+#define	LINUX_SYS_linux_perf_event_open	336
+#define	LINUX_SYS_linux_recvmmsg	337
+#define	LINUX_SYS_linux_fanotify_init	338
+#define	LINUX_SYS_linux_fanotify_mark	339
+#define	LINUX_SYS_linux_prlimit64	340
+#define	LINUX_SYS_linux_name_to_handle_at	341
+#define	LINUX_SYS_linux_open_by_handle_at	342
+#define	LINUX_SYS_linux_clock_adjtime	343
+#define	LINUX_SYS_linux_syncfs	344
+#define	LINUX_SYS_linux_sendmmsg	345
+#define	LINUX_SYS_linux_setns	346
+#define	LINUX_SYS_linux_process_vm_readv	347
+#define	LINUX_SYS_linux_process_vm_writev	348
+#define	LINUX_SYS_MAXSYSCALL	349
diff --git a/sys/amd64/linux32/linux32_syscalls.c b/sys/amd64/linux32/linux32_syscalls.c
new file mode 100644
index 0000000..ebde899
--- /dev/null
+++ b/sys/amd64/linux32/linux32_syscalls.c
@@ -0,0 +1,360 @@
+/*
+ * System call names.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * $FreeBSD$
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed 
+ */
+
+const char *linux_syscallnames[] = {
+#define	nosys	linux_nosys
+	"#0",			/* 0 = setup */
+	"exit",			/* 1 = exit */
+	"linux_fork",			/* 2 = linux_fork */
+	"read",			/* 3 = read */
+	"write",			/* 4 = write */
+	"linux_open",			/* 5 = linux_open */
+	"close",			/* 6 = close */
+	"linux_waitpid",			/* 7 = linux_waitpid */
+	"linux_creat",			/* 8 = linux_creat */
+	"linux_link",			/* 9 = linux_link */
+	"linux_unlink",			/* 10 = linux_unlink */
+	"linux_execve",			/* 11 = linux_execve */
+	"linux_chdir",			/* 12 = linux_chdir */
+	"linux_time",			/* 13 = linux_time */
+	"linux_mknod",			/* 14 = linux_mknod */
+	"linux_chmod",			/* 15 = linux_chmod */
+	"linux_lchown16",			/* 16 = linux_lchown16 */
+	"#17",			/* 17 = break */
+	"linux_stat",			/* 18 = linux_stat */
+	"linux_lseek",			/* 19 = linux_lseek */
+	"linux_getpid",			/* 20 = linux_getpid */
+	"linux_mount",			/* 21 = linux_mount */
+	"linux_oldumount",			/* 22 = linux_oldumount */
+	"linux_setuid16",			/* 23 = linux_setuid16 */
+	"linux_getuid16",			/* 24 = linux_getuid16 */
+	"linux_stime",			/* 25 = linux_stime */
+	"linux_ptrace",			/* 26 = linux_ptrace */
+	"linux_alarm",			/* 27 = linux_alarm */
+	"#28",			/* 28 = fstat */
+	"linux_pause",			/* 29 = linux_pause */
+	"linux_utime",			/* 30 = linux_utime */
+	"#31",			/* 31 = stty */
+	"#32",			/* 32 = gtty */
+	"linux_access",			/* 33 = linux_access */
+	"linux_nice",			/* 34 = linux_nice */
+	"#35",			/* 35 = ftime */
+	"sync",			/* 36 = sync */
+	"linux_kill",			/* 37 = linux_kill */
+	"linux_rename",			/* 38 = linux_rename */
+	"linux_mkdir",			/* 39 = linux_mkdir */
+	"linux_rmdir",			/* 40 = linux_rmdir */
+	"dup",			/* 41 = dup */
+	"linux_pipe",			/* 42 = linux_pipe */
+	"linux_times",			/* 43 = linux_times */
+	"#44",			/* 44 = prof */
+	"linux_brk",			/* 45 = linux_brk */
+	"linux_setgid16",			/* 46 = linux_setgid16 */
+	"linux_getgid16",			/* 47 = linux_getgid16 */
+	"linux_signal",			/* 48 = linux_signal */
+	"linux_geteuid16",			/* 49 = linux_geteuid16 */
+	"linux_getegid16",			/* 50 = linux_getegid16 */
+	"acct",			/* 51 = acct */
+	"linux_umount",			/* 52 = linux_umount */
+	"#53",			/* 53 = lock */
+	"linux_ioctl",			/* 54 = linux_ioctl */
+	"linux_fcntl",			/* 55 = linux_fcntl */
+	"#56",			/* 56 = mpx */
+	"setpgid",			/* 57 = setpgid */
+	"#58",			/* 58 = ulimit */
+	"linux_olduname",			/* 59 = linux_olduname */
+	"umask",			/* 60 = umask */
+	"chroot",			/* 61 = chroot */
+	"linux_ustat",			/* 62 = linux_ustat */
+	"dup2",			/* 63 = dup2 */
+	"linux_getppid",			/* 64 = linux_getppid */
+	"getpgrp",			/* 65 = getpgrp */
+	"setsid",			/* 66 = setsid */
+	"linux_sigaction",			/* 67 = linux_sigaction */
+	"linux_sgetmask",			/* 68 = linux_sgetmask */
+	"linux_ssetmask",			/* 69 = linux_ssetmask */
+	"linux_setreuid16",			/* 70 = linux_setreuid16 */
+	"linux_setregid16",			/* 71 = linux_setregid16 */
+	"linux_sigsuspend",			/* 72 = linux_sigsuspend */
+	"linux_sigpending",			/* 73 = linux_sigpending */
+	"linux_sethostname",			/* 74 = linux_sethostname */
+	"linux_setrlimit",			/* 75 = linux_setrlimit */
+	"linux_old_getrlimit",			/* 76 = linux_old_getrlimit */
+	"linux_getrusage",			/* 77 = linux_getrusage */
+	"linux_gettimeofday",			/* 78 = linux_gettimeofday */
+	"linux_settimeofday",			/* 79 = linux_settimeofday */
+	"linux_getgroups16",			/* 80 = linux_getgroups16 */
+	"linux_setgroups16",			/* 81 = linux_setgroups16 */
+	"linux_old_select",			/* 82 = linux_old_select */
+	"linux_symlink",			/* 83 = linux_symlink */
+	"linux_lstat",			/* 84 = linux_lstat */
+	"linux_readlink",			/* 85 = linux_readlink */
+	"#86",			/* 86 = linux_uselib */
+	"swapon",			/* 87 = swapon */
+	"linux_reboot",			/* 88 = linux_reboot */
+	"linux_readdir",			/* 89 = linux_readdir */
+	"linux_mmap",			/* 90 = linux_mmap */
+	"munmap",			/* 91 = munmap */
+	"linux_truncate",			/* 92 = linux_truncate */
+	"linux_ftruncate",			/* 93 = linux_ftruncate */
+	"fchmod",			/* 94 = fchmod */
+	"fchown",			/* 95 = fchown */
+	"linux_getpriority",			/* 96 = linux_getpriority */
+	"setpriority",			/* 97 = setpriority */
+	"#98",			/* 98 = profil */
+	"linux_statfs",			/* 99 = linux_statfs */
+	"linux_fstatfs",			/* 100 = linux_fstatfs */
+	"#101",			/* 101 = ioperm */
+	"linux_socketcall",			/* 102 = linux_socketcall */
+	"linux_syslog",			/* 103 = linux_syslog */
+	"linux_setitimer",			/* 104 = linux_setitimer */
+	"linux_getitimer",			/* 105 = linux_getitimer */
+	"linux_newstat",			/* 106 = linux_newstat */
+	"linux_newlstat",			/* 107 = linux_newlstat */
+	"linux_newfstat",			/* 108 = linux_newfstat */
+	"linux_uname",			/* 109 = linux_uname */
+	"linux_iopl",			/* 110 = linux_iopl */
+	"linux_vhangup",			/* 111 = linux_vhangup */
+	"#112",			/* 112 = idle */
+	"#113",			/* 113 = vm86old */
+	"linux_wait4",			/* 114 = linux_wait4 */
+	"linux_swapoff",			/* 115 = linux_swapoff */
+	"linux_sysinfo",			/* 116 = linux_sysinfo */
+	"linux_ipc",			/* 117 = linux_ipc */
+	"fsync",			/* 118 = fsync */
+	"linux_sigreturn",			/* 119 = linux_sigreturn */
+	"linux_clone",			/* 120 = linux_clone */
+	"linux_setdomainname",			/* 121 = linux_setdomainname */
+	"linux_newuname",			/* 122 = linux_newuname */
+	"#123",			/* 123 = modify_ldt */
+	"linux_adjtimex",			/* 124 = linux_adjtimex */
+	"linux_mprotect",			/* 125 = linux_mprotect */
+	"linux_sigprocmask",			/* 126 = linux_sigprocmask */
+	"linux_create_module",			/* 127 = linux_create_module */
+	"linux_init_module",			/* 128 = linux_init_module */
+	"linux_delete_module",			/* 129 = linux_delete_module */
+	"linux_get_kernel_syms",			/* 130 = linux_get_kernel_syms */
+	"linux_quotactl",			/* 131 = linux_quotactl */
+	"getpgid",			/* 132 = getpgid */
+	"fchdir",			/* 133 = fchdir */
+	"linux_bdflush",			/* 134 = linux_bdflush */
+	"linux_sysfs",			/* 135 = linux_sysfs */
+	"linux_personality",			/* 136 = linux_personality */
+	"#137",			/* 137 = afs_syscall */
+	"linux_setfsuid16",			/* 138 = linux_setfsuid16 */
+	"linux_setfsgid16",			/* 139 = linux_setfsgid16 */
+	"linux_llseek",			/* 140 = linux_llseek */
+	"linux_getdents",			/* 141 = linux_getdents */
+	"linux_select",			/* 142 = linux_select */
+	"flock",			/* 143 = flock */
+	"linux_msync",			/* 144 = linux_msync */
+	"linux_readv",			/* 145 = linux_readv */
+	"linux_writev",			/* 146 = linux_writev */
+	"linux_getsid",			/* 147 = linux_getsid */
+	"linux_fdatasync",			/* 148 = linux_fdatasync */
+	"linux_sysctl",			/* 149 = linux_sysctl */
+	"mlock",			/* 150 = mlock */
+	"munlock",			/* 151 = munlock */
+	"mlockall",			/* 152 = mlockall */
+	"munlockall",			/* 153 = munlockall */
+	"sched_setparam",			/* 154 = sched_setparam */
+	"sched_getparam",			/* 155 = sched_getparam */
+	"linux_sched_setscheduler",			/* 156 = linux_sched_setscheduler */
+	"linux_sched_getscheduler",			/* 157 = linux_sched_getscheduler */
+	"sched_yield",			/* 158 = sched_yield */
+	"linux_sched_get_priority_max",			/* 159 = linux_sched_get_priority_max */
+	"linux_sched_get_priority_min",			/* 160 = linux_sched_get_priority_min */
+	"linux_sched_rr_get_interval",			/* 161 = linux_sched_rr_get_interval */
+	"linux_nanosleep",			/* 162 = linux_nanosleep */
+	"linux_mremap",			/* 163 = linux_mremap */
+	"linux_setresuid16",			/* 164 = linux_setresuid16 */
+	"linux_getresuid16",			/* 165 = linux_getresuid16 */
+	"#166",			/* 166 = vm86 */
+	"linux_query_module",			/* 167 = linux_query_module */
+	"poll",			/* 168 = poll */
+	"linux_nfsservctl",			/* 169 = linux_nfsservctl */
+	"linux_setresgid16",			/* 170 = linux_setresgid16 */
+	"linux_getresgid16",			/* 171 = linux_getresgid16 */
+	"linux_prctl",			/* 172 = linux_prctl */
+	"linux_rt_sigreturn",			/* 173 = linux_rt_sigreturn */
+	"linux_rt_sigaction",			/* 174 = linux_rt_sigaction */
+	"linux_rt_sigprocmask",			/* 175 = linux_rt_sigprocmask */
+	"linux_rt_sigpending",			/* 176 = linux_rt_sigpending */
+	"linux_rt_sigtimedwait",			/* 177 = linux_rt_sigtimedwait */
+	"linux_rt_sigqueueinfo",			/* 178 = linux_rt_sigqueueinfo */
+	"linux_rt_sigsuspend",			/* 179 = linux_rt_sigsuspend */
+	"linux_pread",			/* 180 = linux_pread */
+	"linux_pwrite",			/* 181 = linux_pwrite */
+	"linux_chown16",			/* 182 = linux_chown16 */
+	"linux_getcwd",			/* 183 = linux_getcwd */
+	"linux_capget",			/* 184 = linux_capget */
+	"linux_capset",			/* 185 = linux_capset */
+	"linux_sigaltstack",			/* 186 = linux_sigaltstack */
+	"linux_sendfile",			/* 187 = linux_sendfile */
+	"#188",			/* 188 = getpmsg */
+	"#189",			/* 189 = putpmsg */
+	"linux_vfork",			/* 190 = linux_vfork */
+	"linux_getrlimit",			/* 191 = linux_getrlimit */
+	"linux_mmap2",			/* 192 = linux_mmap2 */
+	"linux_truncate64",			/* 193 = linux_truncate64 */
+	"linux_ftruncate64",			/* 194 = linux_ftruncate64 */
+	"linux_stat64",			/* 195 = linux_stat64 */
+	"linux_lstat64",			/* 196 = linux_lstat64 */
+	"linux_fstat64",			/* 197 = linux_fstat64 */
+	"linux_lchown",			/* 198 = linux_lchown */
+	"linux_getuid",			/* 199 = linux_getuid */
+	"linux_getgid",			/* 200 = linux_getgid */
+	"geteuid",			/* 201 = geteuid */
+	"getegid",			/* 202 = getegid */
+	"setreuid",			/* 203 = setreuid */
+	"setregid",			/* 204 = setregid */
+	"linux_getgroups",			/* 205 = linux_getgroups */
+	"linux_setgroups",			/* 206 = linux_setgroups */
+	"fchown",			/* 207 = fchown */
+	"setresuid",			/* 208 = setresuid */
+	"getresuid",			/* 209 = getresuid */
+	"setresgid",			/* 210 = setresgid */
+	"getresgid",			/* 211 = getresgid */
+	"linux_chown",			/* 212 = linux_chown */
+	"setuid",			/* 213 = setuid */
+	"setgid",			/* 214 = setgid */
+	"linux_setfsuid",			/* 215 = linux_setfsuid */
+	"linux_setfsgid",			/* 216 = linux_setfsgid */
+	"linux_pivot_root",			/* 217 = linux_pivot_root */
+	"linux_mincore",			/* 218 = linux_mincore */
+	"madvise",			/* 219 = madvise */
+	"linux_getdents64",			/* 220 = linux_getdents64 */
+	"linux_fcntl64",			/* 221 = linux_fcntl64 */
+	"#222",			/* 222 =  */
+	"#223",			/* 223 =  */
+	"linux_gettid",			/* 224 = linux_gettid */
+	"#225",			/* 225 = linux_readahead */
+	"linux_setxattr",			/* 226 = linux_setxattr */
+	"linux_lsetxattr",			/* 227 = linux_lsetxattr */
+	"linux_fsetxattr",			/* 228 = linux_fsetxattr */
+	"linux_getxattr",			/* 229 = linux_getxattr */
+	"linux_lgetxattr",			/* 230 = linux_lgetxattr */
+	"linux_fgetxattr",			/* 231 = linux_fgetxattr */
+	"linux_listxattr",			/* 232 = linux_listxattr */
+	"linux_llistxattr",			/* 233 = linux_llistxattr */
+	"linux_flistxattr",			/* 234 = linux_flistxattr */
+	"linux_removexattr",			/* 235 = linux_removexattr */
+	"linux_lremovexattr",			/* 236 = linux_lremovexattr */
+	"linux_fremovexattr",			/* 237 = linux_fremovexattr */
+	"linux_tkill",			/* 238 = linux_tkill */
+	"#239",			/* 239 = linux_sendfile64 */
+	"linux_sys_futex",			/* 240 = linux_sys_futex */
+	"linux_sched_setaffinity",			/* 241 = linux_sched_setaffinity */
+	"linux_sched_getaffinity",			/* 242 = linux_sched_getaffinity */
+	"linux_set_thread_area",			/* 243 = linux_set_thread_area */
+	"#244",			/* 244 = linux_get_thread_area */
+	"#245",			/* 245 = linux_io_setup */
+	"#246",			/* 246 = linux_io_destroy */
+	"#247",			/* 247 = linux_io_getevents */
+	"#248",			/* 248 = linux_io_submit */
+	"#249",			/* 249 = linux_io_cancel */
+	"linux_fadvise64",			/* 250 = linux_fadvise64 */
+	"#251",			/* 251 =  */
+	"linux_exit_group",			/* 252 = linux_exit_group */
+	"linux_lookup_dcookie",			/* 253 = linux_lookup_dcookie */
+	"linux_epoll_create",			/* 254 = linux_epoll_create */
+	"linux_epoll_ctl",			/* 255 = linux_epoll_ctl */
+	"linux_epoll_wait",			/* 256 = linux_epoll_wait */
+	"linux_remap_file_pages",			/* 257 = linux_remap_file_pages */
+	"linux_set_tid_address",			/* 258 = linux_set_tid_address */
+	"linux_timer_create",			/* 259 = linux_timer_create */
+	"linux_timer_settime",			/* 260 = linux_timer_settime */
+	"linux_timer_gettime",			/* 261 = linux_timer_gettime */
+	"linux_timer_getoverrun",			/* 262 = linux_timer_getoverrun */
+	"linux_timer_delete",			/* 263 = linux_timer_delete */
+	"linux_clock_settime",			/* 264 = linux_clock_settime */
+	"linux_clock_gettime",			/* 265 = linux_clock_gettime */
+	"linux_clock_getres",			/* 266 = linux_clock_getres */
+	"linux_clock_nanosleep",			/* 267 = linux_clock_nanosleep */
+	"linux_statfs64",			/* 268 = linux_statfs64 */
+	"linux_fstatfs64",			/* 269 = linux_fstatfs64 */
+	"linux_tgkill",			/* 270 = linux_tgkill */
+	"linux_utimes",			/* 271 = linux_utimes */
+	"linux_fadvise64_64",			/* 272 = linux_fadvise64_64 */
+	"#273",			/* 273 = vserver */
+	"linux_mbind",			/* 274 = linux_mbind */
+	"linux_get_mempolicy",			/* 275 = linux_get_mempolicy */
+	"linux_set_mempolicy",			/* 276 = linux_set_mempolicy */
+	"linux_mq_open",			/* 277 = linux_mq_open */
+	"linux_mq_unlink",			/* 278 = linux_mq_unlink */
+	"linux_mq_timedsend",			/* 279 = linux_mq_timedsend */
+	"linux_mq_timedreceive",			/* 280 = linux_mq_timedreceive */
+	"linux_mq_notify",			/* 281 = linux_mq_notify */
+	"linux_mq_getsetattr",			/* 282 = linux_mq_getsetattr */
+	"linux_kexec_load",			/* 283 = linux_kexec_load */
+	"linux_waitid",			/* 284 = linux_waitid */
+	"#285",			/* 285 =  */
+	"linux_add_key",			/* 286 = linux_add_key */
+	"linux_request_key",			/* 287 = linux_request_key */
+	"linux_keyctl",			/* 288 = linux_keyctl */
+	"linux_ioprio_set",			/* 289 = linux_ioprio_set */
+	"linux_ioprio_get",			/* 290 = linux_ioprio_get */
+	"linux_inotify_init",			/* 291 = linux_inotify_init */
+	"linux_inotify_add_watch",			/* 292 = linux_inotify_add_watch */
+	"linux_inotify_rm_watch",			/* 293 = linux_inotify_rm_watch */
+	"linux_migrate_pages",			/* 294 = linux_migrate_pages */
+	"linux_openat",			/* 295 = linux_openat */
+	"linux_mkdirat",			/* 296 = linux_mkdirat */
+	"linux_mknodat",			/* 297 = linux_mknodat */
+	"linux_fchownat",			/* 298 = linux_fchownat */
+	"linux_futimesat",			/* 299 = linux_futimesat */
+	"linux_fstatat64",			/* 300 = linux_fstatat64 */
+	"linux_unlinkat",			/* 301 = linux_unlinkat */
+	"linux_renameat",			/* 302 = linux_renameat */
+	"linux_linkat",			/* 303 = linux_linkat */
+	"linux_symlinkat",			/* 304 = linux_symlinkat */
+	"linux_readlinkat",			/* 305 = linux_readlinkat */
+	"linux_fchmodat",			/* 306 = linux_fchmodat */
+	"linux_faccessat",			/* 307 = linux_faccessat */
+	"linux_pselect6",			/* 308 = linux_pselect6 */
+	"linux_ppoll",			/* 309 = linux_ppoll */
+	"linux_unshare",			/* 310 = linux_unshare */
+	"linux_set_robust_list",			/* 311 = linux_set_robust_list */
+	"linux_get_robust_list",			/* 312 = linux_get_robust_list */
+	"linux_splice",			/* 313 = linux_splice */
+	"linux_sync_file_range",			/* 314 = linux_sync_file_range */
+	"linux_tee",			/* 315 = linux_tee */
+	"linux_vmsplice",			/* 316 = linux_vmsplice */
+	"linux_move_pages",			/* 317 = linux_move_pages */
+	"linux_getcpu",			/* 318 = linux_getcpu */
+	"linux_epoll_pwait",			/* 319 = linux_epoll_pwait */
+	"linux_utimensat",			/* 320 = linux_utimensat */
+	"linux_signalfd",			/* 321 = linux_signalfd */
+	"linux_timerfd_create",			/* 322 = linux_timerfd_create */
+	"linux_eventfd",			/* 323 = linux_eventfd */
+	"linux_fallocate",			/* 324 = linux_fallocate */
+	"linux_timerfd_settime",			/* 325 = linux_timerfd_settime */
+	"linux_timerfd_gettime",			/* 326 = linux_timerfd_gettime */
+	"linux_signalfd4",			/* 327 = linux_signalfd4 */
+	"linux_eventfd2",			/* 328 = linux_eventfd2 */
+	"linux_epoll_create1",			/* 329 = linux_epoll_create1 */
+	"linux_dup3",			/* 330 = linux_dup3 */
+	"linux_pipe2",			/* 331 = linux_pipe2 */
+	"linux_inotify_init1",			/* 332 = linux_inotify_init1 */
+	"linux_preadv",			/* 333 = linux_preadv */
+	"linux_pwritev",			/* 334 = linux_pwritev */
+	"linux_rt_tsigqueueinfo",			/* 335 = linux_rt_tsigqueueinfo */
+	"linux_perf_event_open",			/* 336 = linux_perf_event_open */
+	"linux_recvmmsg",			/* 337 = linux_recvmmsg */
+	"linux_fanotify_init",			/* 338 = linux_fanotify_init */
+	"linux_fanotify_mark",			/* 339 = linux_fanotify_mark */
+	"linux_prlimit64",			/* 340 = linux_prlimit64 */
+	"linux_name_to_handle_at",			/* 341 = linux_name_to_handle_at */
+	"linux_open_by_handle_at",			/* 342 = linux_open_by_handle_at */
+	"linux_clock_adjtime",			/* 343 = linux_clock_adjtime */
+	"linux_syncfs",			/* 344 = linux_syncfs */
+	"linux_sendmmsg",			/* 345 = linux_sendmmsg */
+	"linux_setns",			/* 346 = linux_setns */
+	"linux_process_vm_readv",			/* 347 = linux_process_vm_readv */
+	"linux_process_vm_writev",			/* 348 = linux_process_vm_writev */
+};
diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c
new file mode 100644
index 0000000..1ece240
--- /dev/null
+++ b/sys/amd64/linux32/linux32_sysent.c
@@ -0,0 +1,371 @@
+/*
+ * System call switch table.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * $FreeBSD$
+ * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 236026 2012-05-25 21:50:48Z ed 
+ */
+
+#include "opt_compat.h"
+#include <sys/param.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <compat/linux/linux_sysproto.h>
+#include <amd64/linux32/linux.h>
+#include <amd64/linux32/linux32_proto.h>
+
+#define AS(name) (sizeof(struct name) / sizeof(register_t))
+
+/* The casts are bogus but will do for now. */
+struct sysent linux_sysent[] = {
+#define	nosys	linux_nosys
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 0 = setup */
+	{ AS(sys_exit_args), (sy_call_t *)sys_sys_exit, AUE_EXIT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 1 = exit */
+	{ 0, (sy_call_t *)linux_fork, AUE_FORK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 2 = linux_fork */
+	{ AS(read_args), (sy_call_t *)sys_read, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 3 = read */
+	{ AS(write_args), (sy_call_t *)sys_write, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 4 = write */
+	{ AS(linux_open_args), (sy_call_t *)linux_open, AUE_OPEN_RWTC, NULL, 0, 0, 0, SY_THR_STATIC },	/* 5 = linux_open */
+	{ AS(close_args), (sy_call_t *)sys_close, AUE_CLOSE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 6 = close */
+	{ AS(linux_waitpid_args), (sy_call_t *)linux_waitpid, AUE_WAIT4, NULL, 0, 0, 0, SY_THR_STATIC },	/* 7 = linux_waitpid */
+	{ AS(linux_creat_args), (sy_call_t *)linux_creat, AUE_CREAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 8 = linux_creat */
+	{ AS(linux_link_args), (sy_call_t *)linux_link, AUE_LINK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 9 = linux_link */
+	{ AS(linux_unlink_args), (sy_call_t *)linux_unlink, AUE_UNLINK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 10 = linux_unlink */
+	{ AS(linux_execve_args), (sy_call_t *)linux_execve, AUE_EXECVE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 11 = linux_execve */
+	{ AS(linux_chdir_args), (sy_call_t *)linux_chdir, AUE_CHDIR, NULL, 0, 0, 0, SY_THR_STATIC },	/* 12 = linux_chdir */
+	{ AS(linux_time_args), (sy_call_t *)linux_time, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 13 = linux_time */
+	{ AS(linux_mknod_args), (sy_call_t *)linux_mknod, AUE_MKNOD, NULL, 0, 0, 0, SY_THR_STATIC },	/* 14 = linux_mknod */
+	{ AS(linux_chmod_args), (sy_call_t *)linux_chmod, AUE_CHMOD, NULL, 0, 0, 0, SY_THR_STATIC },	/* 15 = linux_chmod */
+	{ AS(linux_lchown16_args), (sy_call_t *)linux_lchown16, AUE_LCHOWN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 16 = linux_lchown16 */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 17 = break */
+	{ AS(linux_stat_args), (sy_call_t *)linux_stat, AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 18 = linux_stat */
+	{ AS(linux_lseek_args), (sy_call_t *)linux_lseek, AUE_LSEEK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 19 = linux_lseek */
+	{ 0, (sy_call_t *)linux_getpid, AUE_GETPID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 20 = linux_getpid */
+	{ AS(linux_mount_args), (sy_call_t *)linux_mount, AUE_MOUNT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 21 = linux_mount */
+	{ AS(linux_oldumount_args), (sy_call_t *)linux_oldumount, AUE_UMOUNT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 22 = linux_oldumount */
+	{ AS(linux_setuid16_args), (sy_call_t *)linux_setuid16, AUE_SETUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 23 = linux_setuid16 */
+	{ 0, (sy_call_t *)linux_getuid16, AUE_GETUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 24 = linux_getuid16 */
+	{ 0, (sy_call_t *)linux_stime, AUE_SETTIMEOFDAY, NULL, 0, 0, 0, SY_THR_STATIC },	/* 25 = linux_stime */
+	{ AS(linux_ptrace_args), (sy_call_t *)linux_ptrace, AUE_PTRACE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 26 = linux_ptrace */
+	{ AS(linux_alarm_args), (sy_call_t *)linux_alarm, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 27 = linux_alarm */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 28 = fstat */
+	{ 0, (sy_call_t *)linux_pause, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 29 = linux_pause */
+	{ AS(linux_utime_args), (sy_call_t *)linux_utime, AUE_UTIME, NULL, 0, 0, 0, SY_THR_STATIC },	/* 30 = linux_utime */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 31 = stty */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 32 = gtty */
+	{ AS(linux_access_args), (sy_call_t *)linux_access, AUE_ACCESS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 33 = linux_access */
+	{ AS(linux_nice_args), (sy_call_t *)linux_nice, AUE_NICE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 34 = linux_nice */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 35 = ftime */
+	{ 0, (sy_call_t *)sys_sync, AUE_SYNC, NULL, 0, 0, 0, SY_THR_STATIC },		/* 36 = sync */
+	{ AS(linux_kill_args), (sy_call_t *)linux_kill, AUE_KILL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 37 = linux_kill */
+	{ AS(linux_rename_args), (sy_call_t *)linux_rename, AUE_RENAME, NULL, 0, 0, 0, SY_THR_STATIC },	/* 38 = linux_rename */
+	{ AS(linux_mkdir_args), (sy_call_t *)linux_mkdir, AUE_MKDIR, NULL, 0, 0, 0, SY_THR_STATIC },	/* 39 = linux_mkdir */
+	{ AS(linux_rmdir_args), (sy_call_t *)linux_rmdir, AUE_RMDIR, NULL, 0, 0, 0, SY_THR_STATIC },	/* 40 = linux_rmdir */
+	{ AS(dup_args), (sy_call_t *)sys_dup, AUE_DUP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 41 = dup */
+	{ AS(linux_pipe_args), (sy_call_t *)linux_pipe, AUE_PIPE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 42 = linux_pipe */
+	{ AS(linux_times_args), (sy_call_t *)linux_times, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 43 = linux_times */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 44 = prof */
+	{ AS(linux_brk_args), (sy_call_t *)linux_brk, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 45 = linux_brk */
+	{ AS(linux_setgid16_args), (sy_call_t *)linux_setgid16, AUE_SETGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 46 = linux_setgid16 */
+	{ 0, (sy_call_t *)linux_getgid16, AUE_GETGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 47 = linux_getgid16 */
+	{ AS(linux_signal_args), (sy_call_t *)linux_signal, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 48 = linux_signal */
+	{ 0, (sy_call_t *)linux_geteuid16, AUE_GETEUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 49 = linux_geteuid16 */
+	{ 0, (sy_call_t *)linux_getegid16, AUE_GETEGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 50 = linux_getegid16 */
+	{ AS(acct_args), (sy_call_t *)sys_acct, AUE_ACCT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 51 = acct */
+	{ AS(linux_umount_args), (sy_call_t *)linux_umount, AUE_UMOUNT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 52 = linux_umount */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 53 = lock */
+	{ AS(linux_ioctl_args), (sy_call_t *)linux_ioctl, AUE_IOCTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 54 = linux_ioctl */
+	{ AS(linux_fcntl_args), (sy_call_t *)linux_fcntl, AUE_FCNTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 55 = linux_fcntl */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 56 = mpx */
+	{ AS(setpgid_args), (sy_call_t *)sys_setpgid, AUE_SETPGRP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 57 = setpgid */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 58 = ulimit */
+	{ 0, (sy_call_t *)linux_olduname, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 59 = linux_olduname */
+	{ AS(umask_args), (sy_call_t *)sys_umask, AUE_UMASK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 60 = umask */
+	{ AS(chroot_args), (sy_call_t *)sys_chroot, AUE_CHROOT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 61 = chroot */
+	{ AS(linux_ustat_args), (sy_call_t *)linux_ustat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 62 = linux_ustat */
+	{ AS(dup2_args), (sy_call_t *)sys_dup2, AUE_DUP2, NULL, 0, 0, 0, SY_THR_STATIC },	/* 63 = dup2 */
+	{ 0, (sy_call_t *)linux_getppid, AUE_GETPPID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 64 = linux_getppid */
+	{ 0, (sy_call_t *)sys_getpgrp, AUE_GETPGRP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 65 = getpgrp */
+	{ 0, (sy_call_t *)sys_setsid, AUE_SETSID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 66 = setsid */
+	{ AS(linux_sigaction_args), (sy_call_t *)linux_sigaction, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 67 = linux_sigaction */
+	{ 0, (sy_call_t *)linux_sgetmask, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 68 = linux_sgetmask */
+	{ AS(linux_ssetmask_args), (sy_call_t *)linux_ssetmask, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 69 = linux_ssetmask */
+	{ AS(linux_setreuid16_args), (sy_call_t *)linux_setreuid16, AUE_SETREUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 70 = linux_setreuid16 */
+	{ AS(linux_setregid16_args), (sy_call_t *)linux_setregid16, AUE_SETREGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 71 = linux_setregid16 */
+	{ AS(linux_sigsuspend_args), (sy_call_t *)linux_sigsuspend, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 72 = linux_sigsuspend */
+	{ AS(linux_sigpending_args), (sy_call_t *)linux_sigpending, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 73 = linux_sigpending */
+	{ AS(linux_sethostname_args), (sy_call_t *)linux_sethostname, AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 74 = linux_sethostname */
+	{ AS(linux_setrlimit_args), (sy_call_t *)linux_setrlimit, AUE_SETRLIMIT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 75 = linux_setrlimit */
+	{ AS(linux_old_getrlimit_args), (sy_call_t *)linux_old_getrlimit, AUE_GETRLIMIT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 76 = linux_old_getrlimit */
+	{ AS(linux_getrusage_args), (sy_call_t *)linux_getrusage, AUE_GETRUSAGE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 77 = linux_getrusage */
+	{ AS(linux_gettimeofday_args), (sy_call_t *)linux_gettimeofday, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 78 = linux_gettimeofday */
+	{ AS(linux_settimeofday_args), (sy_call_t *)linux_settimeofday, AUE_SETTIMEOFDAY, NULL, 0, 0, 0, SY_THR_STATIC },	/* 79 = linux_settimeofday */
+	{ AS(linux_getgroups16_args), (sy_call_t *)linux_getgroups16, AUE_GETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 80 = linux_getgroups16 */
+	{ AS(linux_setgroups16_args), (sy_call_t *)linux_setgroups16, AUE_SETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 81 = linux_setgroups16 */
+	{ AS(linux_old_select_args), (sy_call_t *)linux_old_select, AUE_SELECT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 82 = linux_old_select */
+	{ AS(linux_symlink_args), (sy_call_t *)linux_symlink, AUE_SYMLINK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 83 = linux_symlink */
+	{ AS(linux_lstat_args), (sy_call_t *)linux_lstat, AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 84 = linux_lstat */
+	{ AS(linux_readlink_args), (sy_call_t *)linux_readlink, AUE_READLINK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 85 = linux_readlink */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 86 = linux_uselib */
+	{ AS(swapon_args), (sy_call_t *)sys_swapon, AUE_SWAPON, NULL, 0, 0, 0, SY_THR_STATIC },	/* 87 = swapon */
+	{ AS(linux_reboot_args), (sy_call_t *)linux_reboot, AUE_REBOOT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 88 = linux_reboot */
+	{ AS(linux_readdir_args), (sy_call_t *)linux_readdir, AUE_GETDIRENTRIES, NULL, 0, 0, 0, SY_THR_STATIC },	/* 89 = linux_readdir */
+	{ AS(linux_mmap_args), (sy_call_t *)linux_mmap, AUE_MMAP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 90 = linux_mmap */
+	{ AS(munmap_args), (sy_call_t *)sys_munmap, AUE_MUNMAP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 91 = munmap */
+	{ AS(linux_truncate_args), (sy_call_t *)linux_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 92 = linux_truncate */
+	{ AS(linux_ftruncate_args), (sy_call_t *)linux_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 93 = linux_ftruncate */
+	{ AS(fchmod_args), (sy_call_t *)sys_fchmod, AUE_FCHMOD, NULL, 0, 0, 0, SY_THR_STATIC },	/* 94 = fchmod */
+	{ AS(fchown_args), (sy_call_t *)sys_fchown, AUE_FCHOWN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 95 = fchown */
+	{ AS(linux_getpriority_args), (sy_call_t *)linux_getpriority, AUE_GETPRIORITY, NULL, 0, 0, 0, SY_THR_STATIC },	/* 96 = linux_getpriority */
+	{ AS(setpriority_args), (sy_call_t *)sys_setpriority, AUE_SETPRIORITY, NULL, 0, 0, 0, SY_THR_STATIC },	/* 97 = setpriority */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 98 = profil */
+	{ AS(linux_statfs_args), (sy_call_t *)linux_statfs, AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 99 = linux_statfs */
+	{ AS(linux_fstatfs_args), (sy_call_t *)linux_fstatfs, AUE_FSTATFS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 100 = linux_fstatfs */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 101 = ioperm */
+	{ AS(linux_socketcall_args), (sy_call_t *)linux_socketcall, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 102 = linux_socketcall */
+	{ AS(linux_syslog_args), (sy_call_t *)linux_syslog, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 103 = linux_syslog */
+	{ AS(linux_setitimer_args), (sy_call_t *)linux_setitimer, AUE_SETITIMER, NULL, 0, 0, 0, SY_THR_STATIC },	/* 104 = linux_setitimer */
+	{ AS(linux_getitimer_args), (sy_call_t *)linux_getitimer, AUE_GETITIMER, NULL, 0, 0, 0, SY_THR_STATIC },	/* 105 = linux_getitimer */
+	{ AS(linux_newstat_args), (sy_call_t *)linux_newstat, AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 106 = linux_newstat */
+	{ AS(linux_newlstat_args), (sy_call_t *)linux_newlstat, AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 107 = linux_newlstat */
+	{ AS(linux_newfstat_args), (sy_call_t *)linux_newfstat, AUE_FSTAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 108 = linux_newfstat */
+	{ 0, (sy_call_t *)linux_uname, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 109 = linux_uname */
+	{ AS(linux_iopl_args), (sy_call_t *)linux_iopl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 110 = linux_iopl */
+	{ 0, (sy_call_t *)linux_vhangup, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 111 = linux_vhangup */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 112 = idle */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 113 = vm86old */
+	{ AS(linux_wait4_args), (sy_call_t *)linux_wait4, AUE_WAIT4, NULL, 0, 0, 0, SY_THR_STATIC },	/* 114 = linux_wait4 */
+	{ 0, (sy_call_t *)linux_swapoff, AUE_SWAPOFF, NULL, 0, 0, 0, SY_THR_STATIC },	/* 115 = linux_swapoff */
+	{ AS(linux_sysinfo_args), (sy_call_t *)linux_sysinfo, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 116 = linux_sysinfo */
+	{ AS(linux_ipc_args), (sy_call_t *)linux_ipc, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 117 = linux_ipc */
+	{ AS(fsync_args), (sy_call_t *)sys_fsync, AUE_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC },	/* 118 = fsync */
+	{ AS(linux_sigreturn_args), (sy_call_t *)linux_sigreturn, AUE_SIGRETURN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 119 = linux_sigreturn */
+	{ AS(linux_clone_args), (sy_call_t *)linux_clone, AUE_RFORK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 120 = linux_clone */
+	{ AS(linux_setdomainname_args), (sy_call_t *)linux_setdomainname, AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 121 = linux_setdomainname */
+	{ AS(linux_newuname_args), (sy_call_t *)linux_newuname, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 122 = linux_newuname */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 123 = modify_ldt */
+	{ 0, (sy_call_t *)linux_adjtimex, AUE_ADJTIME, NULL, 0, 0, 0, SY_THR_STATIC },	/* 124 = linux_adjtimex */
+	{ AS(linux_mprotect_args), (sy_call_t *)linux_mprotect, AUE_MPROTECT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 125 = linux_mprotect */
+	{ AS(linux_sigprocmask_args), (sy_call_t *)linux_sigprocmask, AUE_SIGPROCMASK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 126 = linux_sigprocmask */
+	{ 0, (sy_call_t *)linux_create_module, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 127 = linux_create_module */
+	{ 0, (sy_call_t *)linux_init_module, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 128 = linux_init_module */
+	{ 0, (sy_call_t *)linux_delete_module, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 129 = linux_delete_module */
+	{ 0, (sy_call_t *)linux_get_kernel_syms, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 130 = linux_get_kernel_syms */
+	{ 0, (sy_call_t *)linux_quotactl, AUE_QUOTACTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 131 = linux_quotactl */
+	{ AS(getpgid_args), (sy_call_t *)sys_getpgid, AUE_GETPGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 132 = getpgid */
+	{ AS(fchdir_args), (sy_call_t *)sys_fchdir, AUE_FCHDIR, NULL, 0, 0, 0, SY_THR_STATIC },	/* 133 = fchdir */
+	{ 0, (sy_call_t *)linux_bdflush, AUE_BDFLUSH, NULL, 0, 0, 0, SY_THR_STATIC },	/* 134 = linux_bdflush */
+	{ AS(linux_sysfs_args), (sy_call_t *)linux_sysfs, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 135 = linux_sysfs */
+	{ AS(linux_personality_args), (sy_call_t *)linux_personality, AUE_PERSONALITY, NULL, 0, 0, 0, SY_THR_STATIC },	/* 136 = linux_personality */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 137 = afs_syscall */
+	{ AS(linux_setfsuid16_args), (sy_call_t *)linux_setfsuid16, AUE_SETFSUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 138 = linux_setfsuid16 */
+	{ AS(linux_setfsgid16_args), (sy_call_t *)linux_setfsgid16, AUE_SETFSGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 139 = linux_setfsgid16 */
+	{ AS(linux_llseek_args), (sy_call_t *)linux_llseek, AUE_LSEEK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 140 = linux_llseek */
+	{ AS(linux_getdents_args), (sy_call_t *)linux_getdents, AUE_GETDIRENTRIES, NULL, 0, 0, 0, SY_THR_STATIC },	/* 141 = linux_getdents */
+	{ AS(linux_select_args), (sy_call_t *)linux_select, AUE_SELECT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 142 = linux_select */
+	{ AS(flock_args), (sy_call_t *)sys_flock, AUE_FLOCK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 143 = flock */
+	{ AS(linux_msync_args), (sy_call_t *)linux_msync, AUE_MSYNC, NULL, 0, 0, 0, SY_THR_STATIC },	/* 144 = linux_msync */
+	{ AS(linux_readv_args), (sy_call_t *)linux_readv, AUE_READV, NULL, 0, 0, 0, SY_THR_STATIC },	/* 145 = linux_readv */
+	{ AS(linux_writev_args), (sy_call_t *)linux_writev, AUE_WRITEV, NULL, 0, 0, 0, SY_THR_STATIC },	/* 146 = linux_writev */
+	{ AS(linux_getsid_args), (sy_call_t *)linux_getsid, AUE_GETSID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 147 = linux_getsid */
+	{ AS(linux_fdatasync_args), (sy_call_t *)linux_fdatasync, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 148 = linux_fdatasync */
+	{ AS(linux_sysctl_args), (sy_call_t *)linux_sysctl, AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 149 = linux_sysctl */
+	{ AS(mlock_args), (sy_call_t *)sys_mlock, AUE_MLOCK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 150 = mlock */
+	{ AS(munlock_args), (sy_call_t *)sys_munlock, AUE_MUNLOCK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 151 = munlock */
+	{ AS(mlockall_args), (sy_call_t *)sys_mlockall, AUE_MLOCKALL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 152 = mlockall */
+	{ 0, (sy_call_t *)sys_munlockall, AUE_MUNLOCKALL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 153 = munlockall */
+	{ AS(sched_setparam_args), (sy_call_t *)sys_sched_setparam, AUE_SCHED_SETPARAM, NULL, 0, 0, 0, SY_THR_STATIC },	/* 154 = sched_setparam */
+	{ AS(sched_getparam_args), (sy_call_t *)sys_sched_getparam, AUE_SCHED_GETPARAM, NULL, 0, 0, 0, SY_THR_STATIC },	/* 155 = sched_getparam */
+	{ AS(linux_sched_setscheduler_args), (sy_call_t *)linux_sched_setscheduler, AUE_SCHED_SETSCHEDULER, NULL, 0, 0, 0, SY_THR_STATIC },	/* 156 = linux_sched_setscheduler */
+	{ AS(linux_sched_getscheduler_args), (sy_call_t *)linux_sched_getscheduler, AUE_SCHED_GETSCHEDULER, NULL, 0, 0, 0, SY_THR_STATIC },	/* 157 = linux_sched_getscheduler */
+	{ 0, (sy_call_t *)sys_sched_yield, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 158 = sched_yield */
+	{ AS(linux_sched_get_priority_max_args), (sy_call_t *)linux_sched_get_priority_max, AUE_SCHED_GET_PRIORITY_MAX, NULL, 0, 0, 0, SY_THR_STATIC },	/* 159 = linux_sched_get_priority_max */
+	{ AS(linux_sched_get_priority_min_args), (sy_call_t *)linux_sched_get_priority_min, AUE_SCHED_GET_PRIORITY_MIN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 160 = linux_sched_get_priority_min */
+	{ AS(linux_sched_rr_get_interval_args), (sy_call_t *)linux_sched_rr_get_interval, AUE_SCHED_RR_GET_INTERVAL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 161 = linux_sched_rr_get_interval */
+	{ AS(linux_nanosleep_args), (sy_call_t *)linux_nanosleep, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 162 = linux_nanosleep */
+	{ AS(linux_mremap_args), (sy_call_t *)linux_mremap, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 163 = linux_mremap */
+	{ AS(linux_setresuid16_args), (sy_call_t *)linux_setresuid16, AUE_SETRESUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 164 = linux_setresuid16 */
+	{ AS(linux_getresuid16_args), (sy_call_t *)linux_getresuid16, AUE_GETRESUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 165 = linux_getresuid16 */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 166 = vm86 */
+	{ 0, (sy_call_t *)linux_query_module, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 167 = linux_query_module */
+	{ AS(poll_args), (sy_call_t *)sys_poll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 168 = poll */
+	{ 0, (sy_call_t *)linux_nfsservctl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 169 = linux_nfsservctl */
+	{ AS(linux_setresgid16_args), (sy_call_t *)linux_setresgid16, AUE_SETRESGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 170 = linux_setresgid16 */
+	{ AS(linux_getresgid16_args), (sy_call_t *)linux_getresgid16, AUE_GETRESGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 171 = linux_getresgid16 */
+	{ AS(linux_prctl_args), (sy_call_t *)linux_prctl, AUE_PRCTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 172 = linux_prctl */
+	{ AS(linux_rt_sigreturn_args), (sy_call_t *)linux_rt_sigreturn, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 173 = linux_rt_sigreturn */
+	{ AS(linux_rt_sigaction_args), (sy_call_t *)linux_rt_sigaction, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 174 = linux_rt_sigaction */
+	{ AS(linux_rt_sigprocmask_args), (sy_call_t *)linux_rt_sigprocmask, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 175 = linux_rt_sigprocmask */
+	{ AS(linux_rt_sigpending_args), (sy_call_t *)linux_rt_sigpending, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 176 = linux_rt_sigpending */
+	{ AS(linux_rt_sigtimedwait_args), (sy_call_t *)linux_rt_sigtimedwait, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 177 = linux_rt_sigtimedwait */
+	{ 0, (sy_call_t *)linux_rt_sigqueueinfo, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 178 = linux_rt_sigqueueinfo */
+	{ AS(linux_rt_sigsuspend_args), (sy_call_t *)linux_rt_sigsuspend, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 179 = linux_rt_sigsuspend */
+	{ AS(linux_pread_args), (sy_call_t *)linux_pread, AUE_PREAD, NULL, 0, 0, 0, SY_THR_STATIC },	/* 180 = linux_pread */
+	{ AS(linux_pwrite_args), (sy_call_t *)linux_pwrite, AUE_PWRITE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 181 = linux_pwrite */
+	{ AS(linux_chown16_args), (sy_call_t *)linux_chown16, AUE_CHOWN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 182 = linux_chown16 */
+	{ AS(linux_getcwd_args), (sy_call_t *)linux_getcwd, AUE_GETCWD, NULL, 0, 0, 0, SY_THR_STATIC },	/* 183 = linux_getcwd */
+	{ AS(linux_capget_args), (sy_call_t *)linux_capget, AUE_CAPGET, NULL, 0, 0, 0, SY_THR_STATIC },	/* 184 = linux_capget */
+	{ AS(linux_capset_args), (sy_call_t *)linux_capset, AUE_CAPSET, NULL, 0, 0, 0, SY_THR_STATIC },	/* 185 = linux_capset */
+	{ AS(linux_sigaltstack_args), (sy_call_t *)linux_sigaltstack, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 186 = linux_sigaltstack */
+	{ 0, (sy_call_t *)linux_sendfile, AUE_SENDFILE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 187 = linux_sendfile */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 188 = getpmsg */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 189 = putpmsg */
+	{ 0, (sy_call_t *)linux_vfork, AUE_VFORK, NULL, 0, 0, 0, SY_THR_STATIC },	/* 190 = linux_vfork */
+	{ AS(linux_getrlimit_args), (sy_call_t *)linux_getrlimit, AUE_GETRLIMIT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 191 = linux_getrlimit */
+	{ AS(linux_mmap2_args), (sy_call_t *)linux_mmap2, AUE_MMAP, NULL, 0, 0, 0, SY_THR_STATIC },	/* 192 = linux_mmap2 */
+	{ AS(linux_truncate64_args), (sy_call_t *)linux_truncate64, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 193 = linux_truncate64 */
+	{ AS(linux_ftruncate64_args), (sy_call_t *)linux_ftruncate64, AUE_FTRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 194 = linux_ftruncate64 */
+	{ AS(linux_stat64_args), (sy_call_t *)linux_stat64, AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 195 = linux_stat64 */
+	{ AS(linux_lstat64_args), (sy_call_t *)linux_lstat64, AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 196 = linux_lstat64 */
+	{ AS(linux_fstat64_args), (sy_call_t *)linux_fstat64, AUE_FSTAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 197 = linux_fstat64 */
+	{ AS(linux_lchown_args), (sy_call_t *)linux_lchown, AUE_LCHOWN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 198 = linux_lchown */
+	{ 0, (sy_call_t *)linux_getuid, AUE_GETUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 199 = linux_getuid */
+	{ 0, (sy_call_t *)linux_getgid, AUE_GETGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 200 = linux_getgid */
+	{ 0, (sy_call_t *)sys_geteuid, AUE_GETEUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 201 = geteuid */
+	{ 0, (sy_call_t *)sys_getegid, AUE_GETEGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 202 = getegid */
+	{ AS(setreuid_args), (sy_call_t *)sys_setreuid, AUE_SETREUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 203 = setreuid */
+	{ AS(setregid_args), (sy_call_t *)sys_setregid, AUE_SETREGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 204 = setregid */
+	{ AS(linux_getgroups_args), (sy_call_t *)linux_getgroups, AUE_GETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 205 = linux_getgroups */
+	{ AS(linux_setgroups_args), (sy_call_t *)linux_setgroups, AUE_SETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 206 = linux_setgroups */
+	{ AS(fchown_args), (sy_call_t *)sys_fchown, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 207 = fchown */
+	{ AS(setresuid_args), (sy_call_t *)sys_setresuid, AUE_SETRESUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 208 = setresuid */
+	{ AS(getresuid_args), (sy_call_t *)sys_getresuid, AUE_GETRESUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 209 = getresuid */
+	{ AS(setresgid_args), (sy_call_t *)sys_setresgid, AUE_SETRESGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 210 = setresgid */
+	{ AS(getresgid_args), (sy_call_t *)sys_getresgid, AUE_GETRESGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 211 = getresgid */
+	{ AS(linux_chown_args), (sy_call_t *)linux_chown, AUE_CHOWN, NULL, 0, 0, 0, SY_THR_STATIC },	/* 212 = linux_chown */
+	{ AS(setuid_args), (sy_call_t *)sys_setuid, AUE_SETUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 213 = setuid */
+	{ AS(setgid_args), (sy_call_t *)sys_setgid, AUE_SETGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 214 = setgid */
+	{ AS(linux_setfsuid_args), (sy_call_t *)linux_setfsuid, AUE_SETFSUID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 215 = linux_setfsuid */
+	{ AS(linux_setfsgid_args), (sy_call_t *)linux_setfsgid, AUE_SETFSGID, NULL, 0, 0, 0, SY_THR_STATIC },	/* 216 = linux_setfsgid */
+	{ AS(linux_pivot_root_args), (sy_call_t *)linux_pivot_root, AUE_PIVOT_ROOT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 217 = linux_pivot_root */
+	{ AS(linux_mincore_args), (sy_call_t *)linux_mincore, AUE_MINCORE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 218 = linux_mincore */
+	{ AS(madvise_args), (sy_call_t *)sys_madvise, AUE_MADVISE, NULL, 0, 0, 0, SY_THR_STATIC },	/* 219 = madvise */
+	{ AS(linux_getdents64_args), (sy_call_t *)linux_getdents64, AUE_GETDIRENTRIES, NULL, 0, 0, 0, SY_THR_STATIC },	/* 220 = linux_getdents64 */
+	{ AS(linux_fcntl64_args), (sy_call_t *)linux_fcntl64, AUE_FCNTL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 221 = linux_fcntl64 */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 222 =  */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 223 =  */
+	{ 0, (sy_call_t *)linux_gettid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 224 = linux_gettid */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 225 = linux_readahead */
+	{ 0, (sy_call_t *)linux_setxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 226 = linux_setxattr */
+	{ 0, (sy_call_t *)linux_lsetxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 227 = linux_lsetxattr */
+	{ 0, (sy_call_t *)linux_fsetxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 228 = linux_fsetxattr */
+	{ 0, (sy_call_t *)linux_getxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 229 = linux_getxattr */
+	{ 0, (sy_call_t *)linux_lgetxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 230 = linux_lgetxattr */
+	{ 0, (sy_call_t *)linux_fgetxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 231 = linux_fgetxattr */
+	{ 0, (sy_call_t *)linux_listxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 232 = linux_listxattr */
+	{ 0, (sy_call_t *)linux_llistxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 233 = linux_llistxattr */
+	{ 0, (sy_call_t *)linux_flistxattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 234 = linux_flistxattr */
+	{ 0, (sy_call_t *)linux_removexattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 235 = linux_removexattr */
+	{ 0, (sy_call_t *)linux_lremovexattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 236 = linux_lremovexattr */
+	{ 0, (sy_call_t *)linux_fremovexattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 237 = linux_fremovexattr */
+	{ AS(linux_tkill_args), (sy_call_t *)linux_tkill, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 238 = linux_tkill */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 239 = linux_sendfile64 */
+	{ AS(linux_sys_futex_args), (sy_call_t *)linux_sys_futex, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 240 = linux_sys_futex */
+	{ AS(linux_sched_setaffinity_args), (sy_call_t *)linux_sched_setaffinity, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 241 = linux_sched_setaffinity */
+	{ AS(linux_sched_getaffinity_args), (sy_call_t *)linux_sched_getaffinity, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 242 = linux_sched_getaffinity */
+	{ AS(linux_set_thread_area_args), (sy_call_t *)linux_set_thread_area, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 243 = linux_set_thread_area */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 244 = linux_get_thread_area */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 245 = linux_io_setup */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 246 = linux_io_destroy */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 247 = linux_io_getevents */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 248 = linux_io_submit */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 249 = linux_io_cancel */
+	{ AS(linux_fadvise64_args), (sy_call_t *)linux_fadvise64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 250 = linux_fadvise64 */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 251 =  */
+	{ AS(linux_exit_group_args), (sy_call_t *)linux_exit_group, AUE_EXIT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 252 = linux_exit_group */
+	{ 0, (sy_call_t *)linux_lookup_dcookie, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 253 = linux_lookup_dcookie */
+	{ 0, (sy_call_t *)linux_epoll_create, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 254 = linux_epoll_create */
+	{ 0, (sy_call_t *)linux_epoll_ctl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 255 = linux_epoll_ctl */
+	{ 0, (sy_call_t *)linux_epoll_wait, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 256 = linux_epoll_wait */
+	{ 0, (sy_call_t *)linux_remap_file_pages, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 257 = linux_remap_file_pages */
+	{ AS(linux_set_tid_address_args), (sy_call_t *)linux_set_tid_address, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 258 = linux_set_tid_address */
+	{ 0, (sy_call_t *)linux_timer_create, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 259 = linux_timer_create */
+	{ 0, (sy_call_t *)linux_timer_settime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 260 = linux_timer_settime */
+	{ 0, (sy_call_t *)linux_timer_gettime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 261 = linux_timer_gettime */
+	{ 0, (sy_call_t *)linux_timer_getoverrun, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 262 = linux_timer_getoverrun */
+	{ 0, (sy_call_t *)linux_timer_delete, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 263 = linux_timer_delete */
+	{ AS(linux_clock_settime_args), (sy_call_t *)linux_clock_settime, AUE_CLOCK_SETTIME, NULL, 0, 0, 0, SY_THR_STATIC },	/* 264 = linux_clock_settime */
+	{ AS(linux_clock_gettime_args), (sy_call_t *)linux_clock_gettime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 265 = linux_clock_gettime */
+	{ AS(linux_clock_getres_args), (sy_call_t *)linux_clock_getres, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 266 = linux_clock_getres */
+	{ AS(linux_clock_nanosleep_args), (sy_call_t *)linux_clock_nanosleep, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 267 = linux_clock_nanosleep */
+	{ AS(linux_statfs64_args), (sy_call_t *)linux_statfs64, AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 268 = linux_statfs64 */
+	{ 0, (sy_call_t *)linux_fstatfs64, AUE_FSTATFS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 269 = linux_fstatfs64 */
+	{ AS(linux_tgkill_args), (sy_call_t *)linux_tgkill, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 270 = linux_tgkill */
+	{ AS(linux_utimes_args), (sy_call_t *)linux_utimes, AUE_UTIMES, NULL, 0, 0, 0, SY_THR_STATIC },	/* 271 = linux_utimes */
+	{ AS(linux_fadvise64_64_args), (sy_call_t *)linux_fadvise64_64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 272 = linux_fadvise64_64 */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 273 = vserver */
+	{ 0, (sy_call_t *)linux_mbind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 274 = linux_mbind */
+	{ 0, (sy_call_t *)linux_get_mempolicy, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 275 = linux_get_mempolicy */
+	{ 0, (sy_call_t *)linux_set_mempolicy, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 276 = linux_set_mempolicy */
+	{ 0, (sy_call_t *)linux_mq_open, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 277 = linux_mq_open */
+	{ 0, (sy_call_t *)linux_mq_unlink, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 278 = linux_mq_unlink */
+	{ 0, (sy_call_t *)linux_mq_timedsend, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 279 = linux_mq_timedsend */
+	{ 0, (sy_call_t *)linux_mq_timedreceive, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 280 = linux_mq_timedreceive */
+	{ 0, (sy_call_t *)linux_mq_notify, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 281 = linux_mq_notify */
+	{ 0, (sy_call_t *)linux_mq_getsetattr, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 282 = linux_mq_getsetattr */
+	{ 0, (sy_call_t *)linux_kexec_load, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 283 = linux_kexec_load */
+	{ 0, (sy_call_t *)linux_waitid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 284 = linux_waitid */
+	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },			/* 285 =  */
+	{ 0, (sy_call_t *)linux_add_key, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 286 = linux_add_key */
+	{ 0, (sy_call_t *)linux_request_key, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 287 = linux_request_key */
+	{ 0, (sy_call_t *)linux_keyctl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 288 = linux_keyctl */
+	{ 0, (sy_call_t *)linux_ioprio_set, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 289 = linux_ioprio_set */
+	{ 0, (sy_call_t *)linux_ioprio_get, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 290 = linux_ioprio_get */
+	{ 0, (sy_call_t *)linux_inotify_init, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 291 = linux_inotify_init */
+	{ 0, (sy_call_t *)linux_inotify_add_watch, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 292 = linux_inotify_add_watch */
+	{ 0, (sy_call_t *)linux_inotify_rm_watch, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 293 = linux_inotify_rm_watch */
+	{ 0, (sy_call_t *)linux_migrate_pages, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 294 = linux_migrate_pages */
+	{ AS(linux_openat_args), (sy_call_t *)linux_openat, AUE_OPEN_RWTC, NULL, 0, 0, 0, SY_THR_STATIC },	/* 295 = linux_openat */
+	{ AS(linux_mkdirat_args), (sy_call_t *)linux_mkdirat, AUE_MKDIRAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 296 = linux_mkdirat */
+	{ AS(linux_mknodat_args), (sy_call_t *)linux_mknodat, AUE_MKNODAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 297 = linux_mknodat */
+	{ AS(linux_fchownat_args), (sy_call_t *)linux_fchownat, AUE_FCHOWNAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 298 = linux_fchownat */
+	{ AS(linux_futimesat_args), (sy_call_t *)linux_futimesat, AUE_FUTIMESAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 299 = linux_futimesat */
+	{ AS(linux_fstatat64_args), (sy_call_t *)linux_fstatat64, AUE_FSTATAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 300 = linux_fstatat64 */
+	{ AS(linux_unlinkat_args), (sy_call_t *)linux_unlinkat, AUE_UNLINKAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 301 = linux_unlinkat */
+	{ AS(linux_renameat_args), (sy_call_t *)linux_renameat, AUE_RENAMEAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 302 = linux_renameat */
+	{ AS(linux_linkat_args), (sy_call_t *)linux_linkat, AUE_LINKAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 303 = linux_linkat */
+	{ AS(linux_symlinkat_args), (sy_call_t *)linux_symlinkat, AUE_SYMLINKAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 304 = linux_symlinkat */
+	{ AS(linux_readlinkat_args), (sy_call_t *)linux_readlinkat, AUE_READLINKAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 305 = linux_readlinkat */
+	{ AS(linux_fchmodat_args), (sy_call_t *)linux_fchmodat, AUE_FCHMODAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 306 = linux_fchmodat */
+	{ AS(linux_faccessat_args), (sy_call_t *)linux_faccessat, AUE_FACCESSAT, NULL, 0, 0, 0, SY_THR_STATIC },	/* 307 = linux_faccessat */
+	{ 0, (sy_call_t *)linux_pselect6, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 308 = linux_pselect6 */
+	{ 0, (sy_call_t *)linux_ppoll, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 309 = linux_ppoll */
+	{ 0, (sy_call_t *)linux_unshare, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 310 = linux_unshare */
+	{ AS(linux_set_robust_list_args), (sy_call_t *)linux_set_robust_list, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 311 = linux_set_robust_list */
+	{ AS(linux_get_robust_list_args), (sy_call_t *)linux_get_robust_list, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 312 = linux_get_robust_list */
+	{ 0, (sy_call_t *)linux_splice, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 313 = linux_splice */
+	{ 0, (sy_call_t *)linux_sync_file_range, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 314 = linux_sync_file_range */
+	{ 0, (sy_call_t *)linux_tee, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },		/* 315 = linux_tee */
+	{ 0, (sy_call_t *)linux_vmsplice, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 316 = linux_vmsplice */
+	{ 0, (sy_call_t *)linux_move_pages, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 317 = linux_move_pages */
+	{ 0, (sy_call_t *)linux_getcpu, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 318 = linux_getcpu */
+	{ 0, (sy_call_t *)linux_epoll_pwait, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 319 = linux_epoll_pwait */
+	{ 0, (sy_call_t *)linux_utimensat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 320 = linux_utimensat */
+	{ 0, (sy_call_t *)linux_signalfd, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 321 = linux_signalfd */
+	{ 0, (sy_call_t *)linux_timerfd_create, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 322 = linux_timerfd_create */
+	{ 0, (sy_call_t *)linux_eventfd, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 323 = linux_eventfd */
+	{ 0, (sy_call_t *)linux_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 324 = linux_fallocate */
+	{ 0, (sy_call_t *)linux_timerfd_settime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 325 = linux_timerfd_settime */
+	{ 0, (sy_call_t *)linux_timerfd_gettime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 326 = linux_timerfd_gettime */
+	{ 0, (sy_call_t *)linux_signalfd4, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 327 = linux_signalfd4 */
+	{ 0, (sy_call_t *)linux_eventfd2, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 328 = linux_eventfd2 */
+	{ 0, (sy_call_t *)linux_epoll_create1, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 329 = linux_epoll_create1 */
+	{ 0, (sy_call_t *)linux_dup3, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 330 = linux_dup3 */
+	{ AS(linux_pipe2_args), (sy_call_t *)linux_pipe2, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 331 = linux_pipe2 */
+	{ 0, (sy_call_t *)linux_inotify_init1, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 332 = linux_inotify_init1 */
+	{ 0, (sy_call_t *)linux_preadv, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 333 = linux_preadv */
+	{ 0, (sy_call_t *)linux_pwritev, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 334 = linux_pwritev */
+	{ 0, (sy_call_t *)linux_rt_tsigqueueinfo, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 335 = linux_rt_tsigqueueinfo */
+	{ 0, (sy_call_t *)linux_perf_event_open, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 336 = linux_perf_event_open */
+	{ 0, (sy_call_t *)linux_recvmmsg, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 337 = linux_recvmmsg */
+	{ 0, (sy_call_t *)linux_fanotify_init, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 338 = linux_fanotify_init */
+	{ 0, (sy_call_t *)linux_fanotify_mark, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 339 = linux_fanotify_mark */
+	{ 0, (sy_call_t *)linux_prlimit64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 340 = linux_prlimit64 */
+	{ 0, (sy_call_t *)linux_name_to_handle_at, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 341 = linux_name_to_handle_at */
+	{ 0, (sy_call_t *)linux_open_by_handle_at, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 342 = linux_open_by_handle_at */
+	{ 0, (sy_call_t *)linux_clock_adjtime, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 343 = linux_clock_adjtime */
+	{ 0, (sy_call_t *)linux_syncfs, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 344 = linux_syncfs */
+	{ 0, (sy_call_t *)linux_sendmmsg, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 345 = linux_sendmmsg */
+	{ 0, (sy_call_t *)linux_setns, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 346 = linux_setns */
+	{ 0, (sy_call_t *)linux_process_vm_readv, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 347 = linux_process_vm_readv */
+	{ 0, (sy_call_t *)linux_process_vm_writev, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 348 = linux_process_vm_writev */
+};
diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c
new file mode 100644
index 0000000..0b020a7
--- /dev/null
+++ b/sys/amd64/linux32/linux32_systrace_args.c
@@ -0,0 +1,6667 @@
+/*
+ * System call argument to DTrace register array converstion.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * $FreeBSD$
+ * This file is part of the DTrace syscall provider.
+ */
+
+static void
+systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
+{
+	int64_t *iarg  = (int64_t *) uarg;
+	switch (sysnum) {
+#define	nosys	linux_nosys
+	/* sys_exit */
+	case 1: {
+		struct sys_exit_args *p = params;
+		iarg[0] = p->rval; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_fork */
+	case 2: {
+		*n_args = 0;
+		break;
+	}
+	/* read */
+	case 3: {
+		struct read_args *p = params;
+		iarg[0] = p->fd; /* int */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		uarg[2] = p->nbyte; /* u_int */
+		*n_args = 3;
+		break;
+	}
+	/* write */
+	case 4: {
+		struct write_args *p = params;
+		iarg[0] = p->fd; /* int */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		uarg[2] = p->nbyte; /* u_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_open */
+	case 5: {
+		struct linux_open_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->flags; /* l_int */
+		iarg[2] = p->mode; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* close */
+	case 6: {
+		struct close_args *p = params;
+		iarg[0] = p->fd; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_waitpid */
+	case 7: {
+		struct linux_waitpid_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		uarg[1] = (intptr_t) p->status; /* l_int * */
+		iarg[2] = p->options; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_creat */
+	case 8: {
+		struct linux_creat_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->mode; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_link */
+	case 9: {
+		struct linux_link_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->to; /* char * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_unlink */
+	case 10: {
+		struct linux_unlink_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_execve */
+	case 11: {
+		struct linux_execve_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->argp; /* uint32_t * */
+		uarg[2] = (intptr_t) p->envp; /* uint32_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_chdir */
+	case 12: {
+		struct linux_chdir_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_time */
+	case 13: {
+		struct linux_time_args *p = params;
+		uarg[0] = (intptr_t) p->tm; /* l_time_t * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_mknod */
+	case 14: {
+		struct linux_mknod_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->mode; /* l_int */
+		iarg[2] = p->dev; /* l_dev_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_chmod */
+	case 15: {
+		struct linux_chmod_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->mode; /* l_mode_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_lchown16 */
+	case 16: {
+		struct linux_lchown16_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->uid; /* l_uid16_t */
+		iarg[2] = p->gid; /* l_gid16_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_stat */
+	case 18: {
+		struct linux_stat_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->up; /* struct linux_stat * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_lseek */
+	case 19: {
+		struct linux_lseek_args *p = params;
+		iarg[0] = p->fdes; /* l_uint */
+		iarg[1] = p->off; /* l_off_t */
+		iarg[2] = p->whence; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getpid */
+	case 20: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mount */
+	case 21: {
+		struct linux_mount_args *p = params;
+		uarg[0] = (intptr_t) p->specialfile; /* char * */
+		uarg[1] = (intptr_t) p->dir; /* char * */
+		uarg[2] = (intptr_t) p->filesystemtype; /* char * */
+		iarg[3] = p->rwflag; /* l_ulong */
+		uarg[4] = (intptr_t) p->data; /* void * */
+		*n_args = 5;
+		break;
+	}
+	/* linux_oldumount */
+	case 22: {
+		struct linux_oldumount_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setuid16 */
+	case 23: {
+		struct linux_setuid16_args *p = params;
+		iarg[0] = p->uid; /* l_uid16_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_getuid16 */
+	case 24: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_stime */
+	case 25: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_ptrace */
+	case 26: {
+		struct linux_ptrace_args *p = params;
+		iarg[0] = p->req; /* l_long */
+		iarg[1] = p->pid; /* l_long */
+		iarg[2] = p->addr; /* l_long */
+		iarg[3] = p->data; /* l_long */
+		*n_args = 4;
+		break;
+	}
+	/* linux_alarm */
+	case 27: {
+		struct linux_alarm_args *p = params;
+		iarg[0] = p->secs; /* l_uint */
+		*n_args = 1;
+		break;
+	}
+	/* linux_pause */
+	case 29: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_utime */
+	case 30: {
+		struct linux_utime_args *p = params;
+		uarg[0] = (intptr_t) p->fname; /* char * */
+		uarg[1] = (intptr_t) p->times; /* struct l_utimbuf * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_access */
+	case 33: {
+		struct linux_access_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->amode; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_nice */
+	case 34: {
+		struct linux_nice_args *p = params;
+		iarg[0] = p->inc; /* l_int */
+		*n_args = 1;
+		break;
+	}
+	/* sync */
+	case 36: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_kill */
+	case 37: {
+		struct linux_kill_args *p = params;
+		iarg[0] = p->pid; /* l_int */
+		iarg[1] = p->signum; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_rename */
+	case 38: {
+		struct linux_rename_args *p = params;
+		uarg[0] = (intptr_t) p->from; /* char * */
+		uarg[1] = (intptr_t) p->to; /* char * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_mkdir */
+	case 39: {
+		struct linux_mkdir_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->mode; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_rmdir */
+	case 40: {
+		struct linux_rmdir_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* dup */
+	case 41: {
+		struct dup_args *p = params;
+		uarg[0] = p->fd; /* u_int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_pipe */
+	case 42: {
+		struct linux_pipe_args *p = params;
+		uarg[0] = (intptr_t) p->pipefds; /* l_int * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_times */
+	case 43: {
+		struct linux_times_args *p = params;
+		uarg[0] = (intptr_t) p->buf; /* struct l_times_argv * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_brk */
+	case 45: {
+		struct linux_brk_args *p = params;
+		iarg[0] = p->dsend; /* l_ulong */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setgid16 */
+	case 46: {
+		struct linux_setgid16_args *p = params;
+		iarg[0] = p->gid; /* l_gid16_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_getgid16 */
+	case 47: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_signal */
+	case 48: {
+		struct linux_signal_args *p = params;
+		iarg[0] = p->sig; /* l_int */
+		iarg[1] = p->handler; /* l_handler_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_geteuid16 */
+	case 49: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_getegid16 */
+	case 50: {
+		*n_args = 0;
+		break;
+	}
+	/* acct */
+	case 51: {
+		struct acct_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_umount */
+	case 52: {
+		struct linux_umount_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->flags; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_ioctl */
+	case 54: {
+		struct linux_ioctl_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		iarg[1] = p->cmd; /* l_uint */
+		uarg[2] = p->arg; /* uintptr_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_fcntl */
+	case 55: {
+		struct linux_fcntl_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		iarg[1] = p->cmd; /* l_uint */
+		uarg[2] = p->arg; /* uintptr_t */
+		*n_args = 3;
+		break;
+	}
+	/* setpgid */
+	case 57: {
+		struct setpgid_args *p = params;
+		iarg[0] = p->pid; /* int */
+		iarg[1] = p->pgid; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_olduname */
+	case 59: {
+		*n_args = 0;
+		break;
+	}
+	/* umask */
+	case 60: {
+		struct umask_args *p = params;
+		iarg[0] = p->newmask; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* chroot */
+	case 61: {
+		struct chroot_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_ustat */
+	case 62: {
+		struct linux_ustat_args *p = params;
+		iarg[0] = p->dev; /* l_dev_t */
+		uarg[1] = (intptr_t) p->ubuf; /* struct l_ustat * */
+		*n_args = 2;
+		break;
+	}
+	/* dup2 */
+	case 63: {
+		struct dup2_args *p = params;
+		uarg[0] = p->from; /* u_int */
+		uarg[1] = p->to; /* u_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_getppid */
+	case 64: {
+		*n_args = 0;
+		break;
+	}
+	/* getpgrp */
+	case 65: {
+		*n_args = 0;
+		break;
+	}
+	/* setsid */
+	case 66: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sigaction */
+	case 67: {
+		struct linux_sigaction_args *p = params;
+		iarg[0] = p->sig; /* l_int */
+		uarg[1] = (intptr_t) p->nsa; /* l_osigaction_t * */
+		uarg[2] = (intptr_t) p->osa; /* l_osigaction_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_sgetmask */
+	case 68: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_ssetmask */
+	case 69: {
+		struct linux_ssetmask_args *p = params;
+		iarg[0] = p->mask; /* l_osigset_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setreuid16 */
+	case 70: {
+		struct linux_setreuid16_args *p = params;
+		iarg[0] = p->ruid; /* l_uid16_t */
+		iarg[1] = p->euid; /* l_uid16_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_setregid16 */
+	case 71: {
+		struct linux_setregid16_args *p = params;
+		iarg[0] = p->rgid; /* l_gid16_t */
+		iarg[1] = p->egid; /* l_gid16_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_sigsuspend */
+	case 72: {
+		struct linux_sigsuspend_args *p = params;
+		iarg[0] = p->hist0; /* l_int */
+		iarg[1] = p->hist1; /* l_int */
+		iarg[2] = p->mask; /* l_osigset_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_sigpending */
+	case 73: {
+		struct linux_sigpending_args *p = params;
+		uarg[0] = (intptr_t) p->mask; /* l_osigset_t * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_sethostname */
+	case 74: {
+		struct linux_sethostname_args *p = params;
+		uarg[0] = (intptr_t) p->hostname; /* char * */
+		uarg[1] = p->len; /* u_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_setrlimit */
+	case 75: {
+		struct linux_setrlimit_args *p = params;
+		iarg[0] = p->resource; /* l_uint */
+		uarg[1] = (intptr_t) p->rlim; /* struct l_rlimit * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_old_getrlimit */
+	case 76: {
+		struct linux_old_getrlimit_args *p = params;
+		iarg[0] = p->resource; /* l_uint */
+		uarg[1] = (intptr_t) p->rlim; /* struct l_rlimit * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_getrusage */
+	case 77: {
+		struct linux_getrusage_args *p = params;
+		iarg[0] = p->who; /* int */
+		uarg[1] = (intptr_t) p->rusage; /* struct l_rusage * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_gettimeofday */
+	case 78: {
+		struct linux_gettimeofday_args *p = params;
+		uarg[0] = (intptr_t) p->tp; /* struct l_timeval * */
+		uarg[1] = (intptr_t) p->tzp; /* struct timezone * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_settimeofday */
+	case 79: {
+		struct linux_settimeofday_args *p = params;
+		uarg[0] = (intptr_t) p->tp; /* struct l_timeval * */
+		uarg[1] = (intptr_t) p->tzp; /* struct timezone * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_getgroups16 */
+	case 80: {
+		struct linux_getgroups16_args *p = params;
+		iarg[0] = p->gidsetsize; /* l_uint */
+		uarg[1] = (intptr_t) p->gidset; /* l_gid16_t * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_setgroups16 */
+	case 81: {
+		struct linux_setgroups16_args *p = params;
+		iarg[0] = p->gidsetsize; /* l_uint */
+		uarg[1] = (intptr_t) p->gidset; /* l_gid16_t * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_old_select */
+	case 82: {
+		struct linux_old_select_args *p = params;
+		uarg[0] = (intptr_t) p->ptr; /* struct l_old_select_argv * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_symlink */
+	case 83: {
+		struct linux_symlink_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->to; /* char * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_lstat */
+	case 84: {
+		struct linux_lstat_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->up; /* struct linux_lstat * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_readlink */
+	case 85: {
+		struct linux_readlink_args *p = params;
+		uarg[0] = (intptr_t) p->name; /* char * */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		iarg[2] = p->count; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* swapon */
+	case 87: {
+		struct swapon_args *p = params;
+		uarg[0] = (intptr_t) p->name; /* char * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_reboot */
+	case 88: {
+		struct linux_reboot_args *p = params;
+		iarg[0] = p->magic1; /* l_int */
+		iarg[1] = p->magic2; /* l_int */
+		iarg[2] = p->cmd; /* l_uint */
+		uarg[3] = (intptr_t) p->arg; /* void * */
+		*n_args = 4;
+		break;
+	}
+	/* linux_readdir */
+	case 89: {
+		struct linux_readdir_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->dent; /* struct l_dirent * */
+		iarg[2] = p->count; /* l_uint */
+		*n_args = 3;
+		break;
+	}
+	/* linux_mmap */
+	case 90: {
+		struct linux_mmap_args *p = params;
+		uarg[0] = (intptr_t) p->ptr; /* struct l_mmap_argv * */
+		*n_args = 1;
+		break;
+	}
+	/* munmap */
+	case 91: {
+		struct munmap_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* caddr_t */
+		iarg[1] = p->len; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_truncate */
+	case 92: {
+		struct linux_truncate_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->length; /* l_ulong */
+		*n_args = 2;
+		break;
+	}
+	/* linux_ftruncate */
+	case 93: {
+		struct linux_ftruncate_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->length; /* long */
+		*n_args = 2;
+		break;
+	}
+	/* fchmod */
+	case 94: {
+		struct fchmod_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->mode; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* fchown */
+	case 95: {
+		struct fchown_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->uid; /* int */
+		iarg[2] = p->gid; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getpriority */
+	case 96: {
+		struct linux_getpriority_args *p = params;
+		iarg[0] = p->which; /* int */
+		iarg[1] = p->who; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* setpriority */
+	case 97: {
+		struct setpriority_args *p = params;
+		iarg[0] = p->which; /* int */
+		iarg[1] = p->who; /* int */
+		iarg[2] = p->prio; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_statfs */
+	case 99: {
+		struct linux_statfs_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->buf; /* struct l_statfs_buf * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_fstatfs */
+	case 100: {
+		struct linux_fstatfs_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->buf; /* struct l_statfs_buf * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_socketcall */
+	case 102: {
+		struct linux_socketcall_args *p = params;
+		iarg[0] = p->what; /* l_int */
+		iarg[1] = p->args; /* l_ulong */
+		*n_args = 2;
+		break;
+	}
+	/* linux_syslog */
+	case 103: {
+		struct linux_syslog_args *p = params;
+		iarg[0] = p->type; /* l_int */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		iarg[2] = p->len; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_setitimer */
+	case 104: {
+		struct linux_setitimer_args *p = params;
+		iarg[0] = p->which; /* l_int */
+		uarg[1] = (intptr_t) p->itv; /* struct l_itimerval * */
+		uarg[2] = (intptr_t) p->oitv; /* struct l_itimerval * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getitimer */
+	case 105: {
+		struct linux_getitimer_args *p = params;
+		iarg[0] = p->which; /* l_int */
+		uarg[1] = (intptr_t) p->itv; /* struct l_itimerval * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_newstat */
+	case 106: {
+		struct linux_newstat_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->buf; /* struct l_newstat * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_newlstat */
+	case 107: {
+		struct linux_newlstat_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = (intptr_t) p->buf; /* struct l_newstat * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_newfstat */
+	case 108: {
+		struct linux_newfstat_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->buf; /* struct l_newstat * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_uname */
+	case 109: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_iopl */
+	case 110: {
+		struct linux_iopl_args *p = params;
+		iarg[0] = p->level; /* l_int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_vhangup */
+	case 111: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_wait4 */
+	case 114: {
+		struct linux_wait4_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		uarg[1] = (intptr_t) p->status; /* l_uint * */
+		iarg[2] = p->options; /* l_int */
+		uarg[3] = (intptr_t) p->rusage; /* struct l_rusage * */
+		*n_args = 4;
+		break;
+	}
+	/* linux_swapoff */
+	case 115: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sysinfo */
+	case 116: {
+		struct linux_sysinfo_args *p = params;
+		uarg[0] = (intptr_t) p->info; /* struct l_sysinfo * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_ipc */
+	case 117: {
+		struct linux_ipc_args *p = params;
+		iarg[0] = p->what; /* l_uint */
+		iarg[1] = p->arg1; /* l_int */
+		iarg[2] = p->arg2; /* l_int */
+		iarg[3] = p->arg3; /* l_int */
+		uarg[4] = (intptr_t) p->ptr; /* void * */
+		iarg[5] = p->arg5; /* l_long */
+		*n_args = 6;
+		break;
+	}
+	/* fsync */
+	case 118: {
+		struct fsync_args *p = params;
+		iarg[0] = p->fd; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_sigreturn */
+	case 119: {
+		struct linux_sigreturn_args *p = params;
+		uarg[0] = (intptr_t) p->sfp; /* struct l_sigframe * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_clone */
+	case 120: {
+		struct linux_clone_args *p = params;
+		iarg[0] = p->flags; /* l_int */
+		uarg[1] = (intptr_t) p->stack; /* void * */
+		uarg[2] = (intptr_t) p->parent_tidptr; /* void * */
+		uarg[3] = (intptr_t) p->tls; /* void * */
+		uarg[4] = (intptr_t) p->child_tidptr; /* void * */
+		*n_args = 5;
+		break;
+	}
+	/* linux_setdomainname */
+	case 121: {
+		struct linux_setdomainname_args *p = params;
+		uarg[0] = (intptr_t) p->name; /* char * */
+		iarg[1] = p->len; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_newuname */
+	case 122: {
+		struct linux_newuname_args *p = params;
+		uarg[0] = (intptr_t) p->buf; /* struct l_new_utsname * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_adjtimex */
+	case 124: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mprotect */
+	case 125: {
+		struct linux_mprotect_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* caddr_t */
+		iarg[1] = p->len; /* int */
+		iarg[2] = p->prot; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_sigprocmask */
+	case 126: {
+		struct linux_sigprocmask_args *p = params;
+		iarg[0] = p->how; /* l_int */
+		uarg[1] = (intptr_t) p->mask; /* l_osigset_t * */
+		uarg[2] = (intptr_t) p->omask; /* l_osigset_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_create_module */
+	case 127: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_init_module */
+	case 128: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_delete_module */
+	case 129: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_get_kernel_syms */
+	case 130: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_quotactl */
+	case 131: {
+		*n_args = 0;
+		break;
+	}
+	/* getpgid */
+	case 132: {
+		struct getpgid_args *p = params;
+		iarg[0] = p->pid; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* fchdir */
+	case 133: {
+		struct fchdir_args *p = params;
+		iarg[0] = p->fd; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_bdflush */
+	case 134: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sysfs */
+	case 135: {
+		struct linux_sysfs_args *p = params;
+		iarg[0] = p->option; /* l_int */
+		iarg[1] = p->arg1; /* l_ulong */
+		iarg[2] = p->arg2; /* l_ulong */
+		*n_args = 3;
+		break;
+	}
+	/* linux_personality */
+	case 136: {
+		struct linux_personality_args *p = params;
+		iarg[0] = p->per; /* l_ulong */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setfsuid16 */
+	case 138: {
+		struct linux_setfsuid16_args *p = params;
+		iarg[0] = p->uid; /* l_uid16_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setfsgid16 */
+	case 139: {
+		struct linux_setfsgid16_args *p = params;
+		iarg[0] = p->gid; /* l_gid16_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_llseek */
+	case 140: {
+		struct linux_llseek_args *p = params;
+		iarg[0] = p->fd; /* l_int */
+		iarg[1] = p->ohigh; /* l_ulong */
+		iarg[2] = p->olow; /* l_ulong */
+		uarg[3] = (intptr_t) p->res; /* l_loff_t * */
+		iarg[4] = p->whence; /* l_uint */
+		*n_args = 5;
+		break;
+	}
+	/* linux_getdents */
+	case 141: {
+		struct linux_getdents_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->dent; /* void * */
+		iarg[2] = p->count; /* l_uint */
+		*n_args = 3;
+		break;
+	}
+	/* linux_select */
+	case 142: {
+		struct linux_select_args *p = params;
+		iarg[0] = p->nfds; /* l_int */
+		uarg[1] = (intptr_t) p->readfds; /* l_fd_set * */
+		uarg[2] = (intptr_t) p->writefds; /* l_fd_set * */
+		uarg[3] = (intptr_t) p->exceptfds; /* l_fd_set * */
+		uarg[4] = (intptr_t) p->timeout; /* struct l_timeval * */
+		*n_args = 5;
+		break;
+	}
+	/* flock */
+	case 143: {
+		struct flock_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->how; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_msync */
+	case 144: {
+		struct linux_msync_args *p = params;
+		iarg[0] = p->addr; /* l_ulong */
+		iarg[1] = p->len; /* l_size_t */
+		iarg[2] = p->fl; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_readv */
+	case 145: {
+		struct linux_readv_args *p = params;
+		iarg[0] = p->fd; /* l_ulong */
+		uarg[1] = (intptr_t) p->iovp; /* struct l_iovec32 * */
+		iarg[2] = p->iovcnt; /* l_ulong */
+		*n_args = 3;
+		break;
+	}
+	/* linux_writev */
+	case 146: {
+		struct linux_writev_args *p = params;
+		iarg[0] = p->fd; /* l_ulong */
+		uarg[1] = (intptr_t) p->iovp; /* struct l_iovec32 * */
+		iarg[2] = p->iovcnt; /* l_ulong */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getsid */
+	case 147: {
+		struct linux_getsid_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_fdatasync */
+	case 148: {
+		struct linux_fdatasync_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		*n_args = 1;
+		break;
+	}
+	/* linux_sysctl */
+	case 149: {
+		struct linux_sysctl_args *p = params;
+		uarg[0] = (intptr_t) p->args; /* struct l___sysctl_args * */
+		*n_args = 1;
+		break;
+	}
+	/* mlock */
+	case 150: {
+		struct mlock_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* const void * */
+		uarg[1] = p->len; /* size_t */
+		*n_args = 2;
+		break;
+	}
+	/* munlock */
+	case 151: {
+		struct munlock_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* const void * */
+		uarg[1] = p->len; /* size_t */
+		*n_args = 2;
+		break;
+	}
+	/* mlockall */
+	case 152: {
+		struct mlockall_args *p = params;
+		iarg[0] = p->how; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* munlockall */
+	case 153: {
+		*n_args = 0;
+		break;
+	}
+	/* sched_setparam */
+	case 154: {
+		struct sched_setparam_args *p = params;
+		iarg[0] = p->pid; /* pid_t */
+		uarg[1] = (intptr_t) p->param; /* const struct sched_param * */
+		*n_args = 2;
+		break;
+	}
+	/* sched_getparam */
+	case 155: {
+		struct sched_getparam_args *p = params;
+		iarg[0] = p->pid; /* pid_t */
+		uarg[1] = (intptr_t) p->param; /* struct sched_param * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_sched_setscheduler */
+	case 156: {
+		struct linux_sched_setscheduler_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		iarg[1] = p->policy; /* l_int */
+		uarg[2] = (intptr_t) p->param; /* struct l_sched_param * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_sched_getscheduler */
+	case 157: {
+		struct linux_sched_getscheduler_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		*n_args = 1;
+		break;
+	}
+	/* sched_yield */
+	case 158: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sched_get_priority_max */
+	case 159: {
+		struct linux_sched_get_priority_max_args *p = params;
+		iarg[0] = p->policy; /* l_int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_sched_get_priority_min */
+	case 160: {
+		struct linux_sched_get_priority_min_args *p = params;
+		iarg[0] = p->policy; /* l_int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_sched_rr_get_interval */
+	case 161: {
+		struct linux_sched_rr_get_interval_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		uarg[1] = (intptr_t) p->interval; /* struct l_timespec * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_nanosleep */
+	case 162: {
+		struct linux_nanosleep_args *p = params;
+		uarg[0] = (intptr_t) p->rqtp; /* const struct l_timespec * */
+		uarg[1] = (intptr_t) p->rmtp; /* struct l_timespec * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_mremap */
+	case 163: {
+		struct linux_mremap_args *p = params;
+		iarg[0] = p->addr; /* l_ulong */
+		iarg[1] = p->old_len; /* l_ulong */
+		iarg[2] = p->new_len; /* l_ulong */
+		iarg[3] = p->flags; /* l_ulong */
+		iarg[4] = p->new_addr; /* l_ulong */
+		*n_args = 5;
+		break;
+	}
+	/* linux_setresuid16 */
+	case 164: {
+		struct linux_setresuid16_args *p = params;
+		iarg[0] = p->ruid; /* l_uid16_t */
+		iarg[1] = p->euid; /* l_uid16_t */
+		iarg[2] = p->suid; /* l_uid16_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getresuid16 */
+	case 165: {
+		struct linux_getresuid16_args *p = params;
+		uarg[0] = (intptr_t) p->ruid; /* l_uid16_t * */
+		uarg[1] = (intptr_t) p->euid; /* l_uid16_t * */
+		uarg[2] = (intptr_t) p->suid; /* l_uid16_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_query_module */
+	case 167: {
+		*n_args = 0;
+		break;
+	}
+	/* poll */
+	case 168: {
+		struct poll_args *p = params;
+		uarg[0] = (intptr_t) p->fds; /* struct pollfd * */
+		uarg[1] = p->nfds; /* unsigned int */
+		iarg[2] = p->timeout; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_nfsservctl */
+	case 169: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_setresgid16 */
+	case 170: {
+		struct linux_setresgid16_args *p = params;
+		iarg[0] = p->rgid; /* l_gid16_t */
+		iarg[1] = p->egid; /* l_gid16_t */
+		iarg[2] = p->sgid; /* l_gid16_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getresgid16 */
+	case 171: {
+		struct linux_getresgid16_args *p = params;
+		uarg[0] = (intptr_t) p->rgid; /* l_gid16_t * */
+		uarg[1] = (intptr_t) p->egid; /* l_gid16_t * */
+		uarg[2] = (intptr_t) p->sgid; /* l_gid16_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_prctl */
+	case 172: {
+		struct linux_prctl_args *p = params;
+		iarg[0] = p->option; /* l_int */
+		iarg[1] = p->arg2; /* l_int */
+		iarg[2] = p->arg3; /* l_int */
+		iarg[3] = p->arg4; /* l_int */
+		iarg[4] = p->arg5; /* l_int */
+		*n_args = 5;
+		break;
+	}
+	/* linux_rt_sigreturn */
+	case 173: {
+		struct linux_rt_sigreturn_args *p = params;
+		uarg[0] = (intptr_t) p->ucp; /* struct l_ucontext * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_rt_sigaction */
+	case 174: {
+		struct linux_rt_sigaction_args *p = params;
+		iarg[0] = p->sig; /* l_int */
+		uarg[1] = (intptr_t) p->act; /* l_sigaction_t * */
+		uarg[2] = (intptr_t) p->oact; /* l_sigaction_t * */
+		iarg[3] = p->sigsetsize; /* l_size_t */
+		*n_args = 4;
+		break;
+	}
+	/* linux_rt_sigprocmask */
+	case 175: {
+		struct linux_rt_sigprocmask_args *p = params;
+		iarg[0] = p->how; /* l_int */
+		uarg[1] = (intptr_t) p->mask; /* l_sigset_t * */
+		uarg[2] = (intptr_t) p->omask; /* l_sigset_t * */
+		iarg[3] = p->sigsetsize; /* l_size_t */
+		*n_args = 4;
+		break;
+	}
+	/* linux_rt_sigpending */
+	case 176: {
+		struct linux_rt_sigpending_args *p = params;
+		uarg[0] = (intptr_t) p->set; /* l_sigset_t * */
+		iarg[1] = p->sigsetsize; /* l_size_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_rt_sigtimedwait */
+	case 177: {
+		struct linux_rt_sigtimedwait_args *p = params;
+		uarg[0] = (intptr_t) p->mask; /* l_sigset_t * */
+		uarg[1] = (intptr_t) p->ptr; /* l_siginfo_t * */
+		uarg[2] = (intptr_t) p->timeout; /* struct l_timeval * */
+		iarg[3] = p->sigsetsize; /* l_size_t */
+		*n_args = 4;
+		break;
+	}
+	/* linux_rt_sigqueueinfo */
+	case 178: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_rt_sigsuspend */
+	case 179: {
+		struct linux_rt_sigsuspend_args *p = params;
+		uarg[0] = (intptr_t) p->newset; /* l_sigset_t * */
+		iarg[1] = p->sigsetsize; /* l_size_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_pread */
+	case 180: {
+		struct linux_pread_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		iarg[2] = p->nbyte; /* l_size_t */
+		iarg[3] = p->offset; /* l_loff_t */
+		*n_args = 4;
+		break;
+	}
+	/* linux_pwrite */
+	case 181: {
+		struct linux_pwrite_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->buf; /* char * */
+		iarg[2] = p->nbyte; /* l_size_t */
+		iarg[3] = p->offset; /* l_loff_t */
+		*n_args = 4;
+		break;
+	}
+	/* linux_chown16 */
+	case 182: {
+		struct linux_chown16_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->uid; /* l_uid16_t */
+		iarg[2] = p->gid; /* l_gid16_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getcwd */
+	case 183: {
+		struct linux_getcwd_args *p = params;
+		uarg[0] = (intptr_t) p->buf; /* char * */
+		iarg[1] = p->bufsize; /* l_ulong */
+		*n_args = 2;
+		break;
+	}
+	/* linux_capget */
+	case 184: {
+		struct linux_capget_args *p = params;
+		uarg[0] = (intptr_t) p->hdrp; /* struct l_user_cap_header * */
+		uarg[1] = (intptr_t) p->datap; /* struct l_user_cap_data * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_capset */
+	case 185: {
+		struct linux_capset_args *p = params;
+		uarg[0] = (intptr_t) p->hdrp; /* struct l_user_cap_header * */
+		uarg[1] = (intptr_t) p->datap; /* struct l_user_cap_data * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_sigaltstack */
+	case 186: {
+		struct linux_sigaltstack_args *p = params;
+		uarg[0] = (intptr_t) p->uss; /* l_stack_t * */
+		uarg[1] = (intptr_t) p->uoss; /* l_stack_t * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_sendfile */
+	case 187: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_vfork */
+	case 190: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_getrlimit */
+	case 191: {
+		struct linux_getrlimit_args *p = params;
+		iarg[0] = p->resource; /* l_uint */
+		uarg[1] = (intptr_t) p->rlim; /* struct l_rlimit * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_mmap2 */
+	case 192: {
+		struct linux_mmap2_args *p = params;
+		iarg[0] = p->addr; /* l_ulong */
+		iarg[1] = p->len; /* l_ulong */
+		iarg[2] = p->prot; /* l_ulong */
+		iarg[3] = p->flags; /* l_ulong */
+		iarg[4] = p->fd; /* l_ulong */
+		iarg[5] = p->pgoff; /* l_ulong */
+		*n_args = 6;
+		break;
+	}
+	/* linux_truncate64 */
+	case 193: {
+		struct linux_truncate64_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->length; /* l_loff_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_ftruncate64 */
+	case 194: {
+		struct linux_ftruncate64_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		iarg[1] = p->length; /* l_loff_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_stat64 */
+	case 195: {
+		struct linux_stat64_args *p = params;
+		uarg[0] = (intptr_t) p->filename; /* const char * */
+		uarg[1] = (intptr_t) p->statbuf; /* struct l_stat64 * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_lstat64 */
+	case 196: {
+		struct linux_lstat64_args *p = params;
+		uarg[0] = (intptr_t) p->filename; /* const char * */
+		uarg[1] = (intptr_t) p->statbuf; /* struct l_stat64 * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_fstat64 */
+	case 197: {
+		struct linux_fstat64_args *p = params;
+		iarg[0] = p->fd; /* l_int */
+		uarg[1] = (intptr_t) p->statbuf; /* struct l_stat64 * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_lchown */
+	case 198: {
+		struct linux_lchown_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->uid; /* l_uid_t */
+		iarg[2] = p->gid; /* l_gid_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getuid */
+	case 199: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_getgid */
+	case 200: {
+		*n_args = 0;
+		break;
+	}
+	/* geteuid */
+	case 201: {
+		*n_args = 0;
+		break;
+	}
+	/* getegid */
+	case 202: {
+		*n_args = 0;
+		break;
+	}
+	/* setreuid */
+	case 203: {
+		struct setreuid_args *p = params;
+		uarg[0] = p->ruid; /* uid_t */
+		uarg[1] = p->euid; /* uid_t */
+		*n_args = 2;
+		break;
+	}
+	/* setregid */
+	case 204: {
+		struct setregid_args *p = params;
+		iarg[0] = p->rgid; /* gid_t */
+		iarg[1] = p->egid; /* gid_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_getgroups */
+	case 205: {
+		struct linux_getgroups_args *p = params;
+		iarg[0] = p->gidsetsize; /* l_int */
+		uarg[1] = (intptr_t) p->grouplist; /* l_gid_t * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_setgroups */
+	case 206: {
+		struct linux_setgroups_args *p = params;
+		iarg[0] = p->gidsetsize; /* l_int */
+		uarg[1] = (intptr_t) p->grouplist; /* l_gid_t * */
+		*n_args = 2;
+		break;
+	}
+	/* fchown */
+	case 207: {
+		*n_args = 0;
+		break;
+	}
+	/* setresuid */
+	case 208: {
+		struct setresuid_args *p = params;
+		uarg[0] = p->ruid; /* uid_t */
+		uarg[1] = p->euid; /* uid_t */
+		uarg[2] = p->suid; /* uid_t */
+		*n_args = 3;
+		break;
+	}
+	/* getresuid */
+	case 209: {
+		struct getresuid_args *p = params;
+		uarg[0] = (intptr_t) p->ruid; /* uid_t * */
+		uarg[1] = (intptr_t) p->euid; /* uid_t * */
+		uarg[2] = (intptr_t) p->suid; /* uid_t * */
+		*n_args = 3;
+		break;
+	}
+	/* setresgid */
+	case 210: {
+		struct setresgid_args *p = params;
+		iarg[0] = p->rgid; /* gid_t */
+		iarg[1] = p->egid; /* gid_t */
+		iarg[2] = p->sgid; /* gid_t */
+		*n_args = 3;
+		break;
+	}
+	/* getresgid */
+	case 211: {
+		struct getresgid_args *p = params;
+		uarg[0] = (intptr_t) p->rgid; /* gid_t * */
+		uarg[1] = (intptr_t) p->egid; /* gid_t * */
+		uarg[2] = (intptr_t) p->sgid; /* gid_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_chown */
+	case 212: {
+		struct linux_chown_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		iarg[1] = p->uid; /* l_uid_t */
+		iarg[2] = p->gid; /* l_gid_t */
+		*n_args = 3;
+		break;
+	}
+	/* setuid */
+	case 213: {
+		struct setuid_args *p = params;
+		uarg[0] = p->uid; /* uid_t */
+		*n_args = 1;
+		break;
+	}
+	/* setgid */
+	case 214: {
+		struct setgid_args *p = params;
+		iarg[0] = p->gid; /* gid_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setfsuid */
+	case 215: {
+		struct linux_setfsuid_args *p = params;
+		iarg[0] = p->uid; /* l_uid_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_setfsgid */
+	case 216: {
+		struct linux_setfsgid_args *p = params;
+		iarg[0] = p->gid; /* l_gid_t */
+		*n_args = 1;
+		break;
+	}
+	/* linux_pivot_root */
+	case 217: {
+		struct linux_pivot_root_args *p = params;
+		uarg[0] = (intptr_t) p->new_root; /* char * */
+		uarg[1] = (intptr_t) p->put_old; /* char * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_mincore */
+	case 218: {
+		struct linux_mincore_args *p = params;
+		iarg[0] = p->start; /* l_ulong */
+		iarg[1] = p->len; /* l_size_t */
+		uarg[2] = (intptr_t) p->vec; /* u_char * */
+		*n_args = 3;
+		break;
+	}
+	/* madvise */
+	case 219: {
+		struct madvise_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* void * */
+		uarg[1] = p->len; /* size_t */
+		iarg[2] = p->behav; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_getdents64 */
+	case 220: {
+		struct linux_getdents64_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		uarg[1] = (intptr_t) p->dirent; /* void * */
+		iarg[2] = p->count; /* l_uint */
+		*n_args = 3;
+		break;
+	}
+	/* linux_fcntl64 */
+	case 221: {
+		struct linux_fcntl64_args *p = params;
+		iarg[0] = p->fd; /* l_uint */
+		iarg[1] = p->cmd; /* l_uint */
+		uarg[2] = p->arg; /* uintptr_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_gettid */
+	case 224: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_setxattr */
+	case 226: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_lsetxattr */
+	case 227: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fsetxattr */
+	case 228: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_getxattr */
+	case 229: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_lgetxattr */
+	case 230: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fgetxattr */
+	case 231: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_listxattr */
+	case 232: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_llistxattr */
+	case 233: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_flistxattr */
+	case 234: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_removexattr */
+	case 235: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_lremovexattr */
+	case 236: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fremovexattr */
+	case 237: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_tkill */
+	case 238: {
+		struct linux_tkill_args *p = params;
+		iarg[0] = p->tid; /* int */
+		iarg[1] = p->sig; /* int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_sys_futex */
+	case 240: {
+		struct linux_sys_futex_args *p = params;
+		uarg[0] = (intptr_t) p->uaddr; /* void * */
+		iarg[1] = p->op; /* int */
+		uarg[2] = p->val; /* uint32_t */
+		uarg[3] = (intptr_t) p->timeout; /* struct l_timespec * */
+		uarg[4] = (intptr_t) p->uaddr2; /* uint32_t * */
+		uarg[5] = p->val3; /* uint32_t */
+		*n_args = 6;
+		break;
+	}
+	/* linux_sched_setaffinity */
+	case 241: {
+		struct linux_sched_setaffinity_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		iarg[1] = p->len; /* l_uint */
+		uarg[2] = (intptr_t) p->user_mask_ptr; /* l_ulong * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_sched_getaffinity */
+	case 242: {
+		struct linux_sched_getaffinity_args *p = params;
+		iarg[0] = p->pid; /* l_pid_t */
+		iarg[1] = p->len; /* l_uint */
+		uarg[2] = (intptr_t) p->user_mask_ptr; /* l_ulong * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_set_thread_area */
+	case 243: {
+		struct linux_set_thread_area_args *p = params;
+		uarg[0] = (intptr_t) p->desc; /* struct l_user_desc * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_fadvise64 */
+	case 250: {
+		struct linux_fadvise64_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->offset; /* l_loff_t */
+		iarg[2] = p->len; /* l_size_t */
+		iarg[3] = p->advice; /* int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_exit_group */
+	case 252: {
+		struct linux_exit_group_args *p = params;
+		iarg[0] = p->error_code; /* int */
+		*n_args = 1;
+		break;
+	}
+	/* linux_lookup_dcookie */
+	case 253: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_epoll_create */
+	case 254: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_epoll_ctl */
+	case 255: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_epoll_wait */
+	case 256: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_remap_file_pages */
+	case 257: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_set_tid_address */
+	case 258: {
+		struct linux_set_tid_address_args *p = params;
+		uarg[0] = (intptr_t) p->tidptr; /* int * */
+		*n_args = 1;
+		break;
+	}
+	/* linux_timer_create */
+	case 259: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timer_settime */
+	case 260: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timer_gettime */
+	case 261: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timer_getoverrun */
+	case 262: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timer_delete */
+	case 263: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_clock_settime */
+	case 264: {
+		struct linux_clock_settime_args *p = params;
+		iarg[0] = p->which; /* clockid_t */
+		uarg[1] = (intptr_t) p->tp; /* struct l_timespec * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_clock_gettime */
+	case 265: {
+		struct linux_clock_gettime_args *p = params;
+		iarg[0] = p->which; /* clockid_t */
+		uarg[1] = (intptr_t) p->tp; /* struct l_timespec * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_clock_getres */
+	case 266: {
+		struct linux_clock_getres_args *p = params;
+		iarg[0] = p->which; /* clockid_t */
+		uarg[1] = (intptr_t) p->tp; /* struct l_timespec * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_clock_nanosleep */
+	case 267: {
+		struct linux_clock_nanosleep_args *p = params;
+		iarg[0] = p->which; /* clockid_t */
+		iarg[1] = p->flags; /* int */
+		uarg[2] = (intptr_t) p->rqtp; /* struct l_timespec * */
+		uarg[3] = (intptr_t) p->rmtp; /* struct l_timespec * */
+		*n_args = 4;
+		break;
+	}
+	/* linux_statfs64 */
+	case 268: {
+		struct linux_statfs64_args *p = params;
+		uarg[0] = (intptr_t) p->path; /* char * */
+		uarg[1] = p->bufsize; /* size_t */
+		uarg[2] = (intptr_t) p->buf; /* struct l_statfs64_buf * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_fstatfs64 */
+	case 269: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_tgkill */
+	case 270: {
+		struct linux_tgkill_args *p = params;
+		iarg[0] = p->tgid; /* int */
+		iarg[1] = p->pid; /* int */
+		iarg[2] = p->sig; /* int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_utimes */
+	case 271: {
+		struct linux_utimes_args *p = params;
+		uarg[0] = (intptr_t) p->fname; /* char * */
+		uarg[1] = (intptr_t) p->tptr; /* struct l_timeval * */
+		*n_args = 2;
+		break;
+	}
+	/* linux_fadvise64_64 */
+	case 272: {
+		struct linux_fadvise64_64_args *p = params;
+		iarg[0] = p->fd; /* int */
+		iarg[1] = p->offset; /* l_loff_t */
+		iarg[2] = p->len; /* l_loff_t */
+		iarg[3] = p->advice; /* int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_mbind */
+	case 274: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_get_mempolicy */
+	case 275: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_set_mempolicy */
+	case 276: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_open */
+	case 277: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_unlink */
+	case 278: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_timedsend */
+	case 279: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_timedreceive */
+	case 280: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_notify */
+	case 281: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_mq_getsetattr */
+	case 282: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_kexec_load */
+	case 283: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_waitid */
+	case 284: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_add_key */
+	case 286: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_request_key */
+	case 287: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_keyctl */
+	case 288: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_ioprio_set */
+	case 289: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_ioprio_get */
+	case 290: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_inotify_init */
+	case 291: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_inotify_add_watch */
+	case 292: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_inotify_rm_watch */
+	case 293: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_migrate_pages */
+	case 294: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_openat */
+	case 295: {
+		struct linux_openat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* const char * */
+		iarg[2] = p->flags; /* l_int */
+		iarg[3] = p->mode; /* l_int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_mkdirat */
+	case 296: {
+		struct linux_mkdirat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->pathname; /* const char * */
+		iarg[2] = p->mode; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_mknodat */
+	case 297: {
+		struct linux_mknodat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* const char * */
+		iarg[2] = p->mode; /* l_int */
+		iarg[3] = p->dev; /* l_uint */
+		*n_args = 4;
+		break;
+	}
+	/* linux_fchownat */
+	case 298: {
+		struct linux_fchownat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* const char * */
+		iarg[2] = p->uid; /* l_uid16_t */
+		iarg[3] = p->gid; /* l_gid16_t */
+		iarg[4] = p->flag; /* l_int */
+		*n_args = 5;
+		break;
+	}
+	/* linux_futimesat */
+	case 299: {
+		struct linux_futimesat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* char * */
+		uarg[2] = (intptr_t) p->utimes; /* struct l_timeval * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_fstatat64 */
+	case 300: {
+		struct linux_fstatat64_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->pathname; /* char * */
+		uarg[2] = (intptr_t) p->statbuf; /* struct l_stat64 * */
+		iarg[3] = p->flag; /* l_int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_unlinkat */
+	case 301: {
+		struct linux_unlinkat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->pathname; /* const char * */
+		iarg[2] = p->flag; /* l_int */
+		*n_args = 3;
+		break;
+	}
+	/* linux_renameat */
+	case 302: {
+		struct linux_renameat_args *p = params;
+		iarg[0] = p->olddfd; /* l_int */
+		uarg[1] = (intptr_t) p->oldname; /* const char * */
+		iarg[2] = p->newdfd; /* l_int */
+		uarg[3] = (intptr_t) p->newname; /* const char * */
+		*n_args = 4;
+		break;
+	}
+	/* linux_linkat */
+	case 303: {
+		struct linux_linkat_args *p = params;
+		iarg[0] = p->olddfd; /* l_int */
+		uarg[1] = (intptr_t) p->oldname; /* const char * */
+		iarg[2] = p->newdfd; /* l_int */
+		uarg[3] = (intptr_t) p->newname; /* const char * */
+		iarg[4] = p->flag; /* l_int */
+		*n_args = 5;
+		break;
+	}
+	/* linux_symlinkat */
+	case 304: {
+		struct linux_symlinkat_args *p = params;
+		uarg[0] = (intptr_t) p->oldname; /* const char * */
+		iarg[1] = p->newdfd; /* l_int */
+		uarg[2] = (intptr_t) p->newname; /* const char * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_readlinkat */
+	case 305: {
+		struct linux_readlinkat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->path; /* const char * */
+		uarg[2] = (intptr_t) p->buf; /* char * */
+		iarg[3] = p->bufsiz; /* l_int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_fchmodat */
+	case 306: {
+		struct linux_fchmodat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* const char * */
+		iarg[2] = p->mode; /* l_mode_t */
+		*n_args = 3;
+		break;
+	}
+	/* linux_faccessat */
+	case 307: {
+		struct linux_faccessat_args *p = params;
+		iarg[0] = p->dfd; /* l_int */
+		uarg[1] = (intptr_t) p->filename; /* const char * */
+		iarg[2] = p->amode; /* l_int */
+		iarg[3] = p->flag; /* int */
+		*n_args = 4;
+		break;
+	}
+	/* linux_pselect6 */
+	case 308: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_ppoll */
+	case 309: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_unshare */
+	case 310: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_set_robust_list */
+	case 311: {
+		struct linux_set_robust_list_args *p = params;
+		uarg[0] = (intptr_t) p->head; /* struct linux_robust_list_head * */
+		iarg[1] = p->len; /* l_size_t */
+		*n_args = 2;
+		break;
+	}
+	/* linux_get_robust_list */
+	case 312: {
+		struct linux_get_robust_list_args *p = params;
+		iarg[0] = p->pid; /* l_int */
+		uarg[1] = (intptr_t) p->head; /* struct linux_robust_list_head * */
+		uarg[2] = (intptr_t) p->len; /* l_size_t * */
+		*n_args = 3;
+		break;
+	}
+	/* linux_splice */
+	case 313: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sync_file_range */
+	case 314: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_tee */
+	case 315: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_vmsplice */
+	case 316: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_move_pages */
+	case 317: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_getcpu */
+	case 318: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_epoll_pwait */
+	case 319: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_utimensat */
+	case 320: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_signalfd */
+	case 321: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timerfd_create */
+	case 322: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_eventfd */
+	case 323: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fallocate */
+	case 324: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timerfd_settime */
+	case 325: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_timerfd_gettime */
+	case 326: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_signalfd4 */
+	case 327: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_eventfd2 */
+	case 328: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_epoll_create1 */
+	case 329: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_dup3 */
+	case 330: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_pipe2 */
+	case 331: {
+		struct linux_pipe2_args *p = params;
+		uarg[0] = (intptr_t) p->pipefds; /* l_int * */
+		iarg[1] = p->flags; /* l_int */
+		*n_args = 2;
+		break;
+	}
+	/* linux_inotify_init1 */
+	case 332: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_preadv */
+	case 333: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_pwritev */
+	case 334: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_rt_tsigqueueinfo */
+	case 335: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_perf_event_open */
+	case 336: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_recvmmsg */
+	case 337: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fanotify_init */
+	case 338: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_fanotify_mark */
+	case 339: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_prlimit64 */
+	case 340: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_name_to_handle_at */
+	case 341: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_open_by_handle_at */
+	case 342: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_clock_adjtime */
+	case 343: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_syncfs */
+	case 344: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_sendmmsg */
+	case 345: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_setns */
+	case 346: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_process_vm_readv */
+	case 347: {
+		*n_args = 0;
+		break;
+	}
+	/* linux_process_vm_writev */
+	case 348: {
+		*n_args = 0;
+		break;
+	}
+	default:
+		*n_args = 0;
+		break;
+	};
+}
+static void
+systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
+{
+	const char *p = NULL;
+	switch (sysnum) {
+#define	nosys	linux_nosys
+	/* sys_exit */
+	case 1:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fork */
+	case 2:
+		break;
+	/* read */
+	case 3:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "u_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* write */
+	case 4:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "u_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_open */
+	case 5:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* close */
+	case 6:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_waitpid */
+	case 7:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "l_int *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_creat */
+	case 8:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_link */
+	case 9:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_unlink */
+	case 10:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_execve */
+	case 11:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "uint32_t *";
+			break;
+		case 2:
+			p = "uint32_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_chdir */
+	case 12:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_time */
+	case 13:
+		switch(ndx) {
+		case 0:
+			p = "l_time_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mknod */
+	case 14:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_dev_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_chmod */
+	case 15:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_mode_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lchown16 */
+	case 16:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_uid16_t";
+			break;
+		case 2:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_stat */
+	case 18:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct linux_stat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lseek */
+	case 19:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_off_t";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getpid */
+	case 20:
+		break;
+	/* linux_mount */
+	case 21:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "char *";
+			break;
+		case 3:
+			p = "l_ulong";
+			break;
+		case 4:
+			p = "void *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_oldumount */
+	case 22:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setuid16 */
+	case 23:
+		switch(ndx) {
+		case 0:
+			p = "l_uid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getuid16 */
+	case 24:
+		break;
+	/* linux_stime */
+	case 25:
+		break;
+	/* linux_ptrace */
+	case 26:
+		switch(ndx) {
+		case 0:
+			p = "l_long";
+			break;
+		case 1:
+			p = "l_long";
+			break;
+		case 2:
+			p = "l_long";
+			break;
+		case 3:
+			p = "l_long";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_alarm */
+	case 27:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pause */
+	case 29:
+		break;
+	/* linux_utime */
+	case 30:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct l_utimbuf *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_access */
+	case 33:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_nice */
+	case 34:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* sync */
+	case 36:
+		break;
+	/* linux_kill */
+	case 37:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rename */
+	case 38:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mkdir */
+	case 39:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rmdir */
+	case 40:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* dup */
+	case 41:
+		switch(ndx) {
+		case 0:
+			p = "u_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pipe */
+	case 42:
+		switch(ndx) {
+		case 0:
+			p = "l_int *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_times */
+	case 43:
+		switch(ndx) {
+		case 0:
+			p = "struct l_times_argv *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_brk */
+	case 45:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setgid16 */
+	case 46:
+		switch(ndx) {
+		case 0:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getgid16 */
+	case 47:
+		break;
+	/* linux_signal */
+	case 48:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_handler_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_geteuid16 */
+	case 49:
+		break;
+	/* linux_getegid16 */
+	case 50:
+		break;
+	/* acct */
+	case 51:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_umount */
+	case 52:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_ioctl */
+	case 54:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_uint";
+			break;
+		case 2:
+			p = "uintptr_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fcntl */
+	case 55:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_uint";
+			break;
+		case 2:
+			p = "uintptr_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setpgid */
+	case 57:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_olduname */
+	case 59:
+		break;
+	/* umask */
+	case 60:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* chroot */
+	case 61:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_ustat */
+	case 62:
+		switch(ndx) {
+		case 0:
+			p = "l_dev_t";
+			break;
+		case 1:
+			p = "struct l_ustat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* dup2 */
+	case 63:
+		switch(ndx) {
+		case 0:
+			p = "u_int";
+			break;
+		case 1:
+			p = "u_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getppid */
+	case 64:
+		break;
+	/* getpgrp */
+	case 65:
+		break;
+	/* setsid */
+	case 66:
+		break;
+	/* linux_sigaction */
+	case 67:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_osigaction_t *";
+			break;
+		case 2:
+			p = "l_osigaction_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sgetmask */
+	case 68:
+		break;
+	/* linux_ssetmask */
+	case 69:
+		switch(ndx) {
+		case 0:
+			p = "l_osigset_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setreuid16 */
+	case 70:
+		switch(ndx) {
+		case 0:
+			p = "l_uid16_t";
+			break;
+		case 1:
+			p = "l_uid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setregid16 */
+	case 71:
+		switch(ndx) {
+		case 0:
+			p = "l_gid16_t";
+			break;
+		case 1:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sigsuspend */
+	case 72:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_osigset_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sigpending */
+	case 73:
+		switch(ndx) {
+		case 0:
+			p = "l_osigset_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sethostname */
+	case 74:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "u_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setrlimit */
+	case 75:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_rlimit *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_old_getrlimit */
+	case 76:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_rlimit *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getrusage */
+	case 77:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "struct l_rusage *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_gettimeofday */
+	case 78:
+		switch(ndx) {
+		case 0:
+			p = "struct l_timeval *";
+			break;
+		case 1:
+			p = "struct timezone *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_settimeofday */
+	case 79:
+		switch(ndx) {
+		case 0:
+			p = "struct l_timeval *";
+			break;
+		case 1:
+			p = "struct timezone *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getgroups16 */
+	case 80:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_gid16_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setgroups16 */
+	case 81:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_gid16_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_old_select */
+	case 82:
+		switch(ndx) {
+		case 0:
+			p = "struct l_old_select_argv *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_symlink */
+	case 83:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lstat */
+	case 84:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct linux_lstat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_readlink */
+	case 85:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* swapon */
+	case 87:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_reboot */
+	case 88:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_uint";
+			break;
+		case 3:
+			p = "void *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_readdir */
+	case 89:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_dirent *";
+			break;
+		case 2:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mmap */
+	case 90:
+		switch(ndx) {
+		case 0:
+			p = "struct l_mmap_argv *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* munmap */
+	case 91:
+		switch(ndx) {
+		case 0:
+			p = "caddr_t";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_truncate */
+	case 92:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_ftruncate */
+	case 93:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "long";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* fchmod */
+	case 94:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* fchown */
+	case 95:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getpriority */
+	case 96:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setpriority */
+	case 97:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_statfs */
+	case 99:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct l_statfs_buf *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fstatfs */
+	case 100:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_statfs_buf *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_socketcall */
+	case 102:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_syslog */
+	case 103:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setitimer */
+	case 104:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "struct l_itimerval *";
+			break;
+		case 2:
+			p = "struct l_itimerval *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getitimer */
+	case 105:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "struct l_itimerval *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_newstat */
+	case 106:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct l_newstat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_newlstat */
+	case 107:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct l_newstat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_newfstat */
+	case 108:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_newstat *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_uname */
+	case 109:
+		break;
+	/* linux_iopl */
+	case 110:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_vhangup */
+	case 111:
+		break;
+	/* linux_wait4 */
+	case 114:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "l_uint *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "struct l_rusage *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_swapoff */
+	case 115:
+		break;
+	/* linux_sysinfo */
+	case 116:
+		switch(ndx) {
+		case 0:
+			p = "struct l_sysinfo *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_ipc */
+	case 117:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "l_int";
+			break;
+		case 4:
+			p = "void *";
+			break;
+		case 5:
+			p = "l_long";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* fsync */
+	case 118:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sigreturn */
+	case 119:
+		switch(ndx) {
+		case 0:
+			p = "struct l_sigframe *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_clone */
+	case 120:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "void *";
+			break;
+		case 2:
+			p = "void *";
+			break;
+		case 3:
+			p = "void *";
+			break;
+		case 4:
+			p = "void *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setdomainname */
+	case 121:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_newuname */
+	case 122:
+		switch(ndx) {
+		case 0:
+			p = "struct l_new_utsname *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_adjtimex */
+	case 124:
+		break;
+	/* linux_mprotect */
+	case 125:
+		switch(ndx) {
+		case 0:
+			p = "caddr_t";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sigprocmask */
+	case 126:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_osigset_t *";
+			break;
+		case 2:
+			p = "l_osigset_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_create_module */
+	case 127:
+		break;
+	/* linux_init_module */
+	case 128:
+		break;
+	/* linux_delete_module */
+	case 129:
+		break;
+	/* linux_get_kernel_syms */
+	case 130:
+		break;
+	/* linux_quotactl */
+	case 131:
+		break;
+	/* getpgid */
+	case 132:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* fchdir */
+	case 133:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_bdflush */
+	case 134:
+		break;
+	/* linux_sysfs */
+	case 135:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_personality */
+	case 136:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setfsuid16 */
+	case 138:
+		switch(ndx) {
+		case 0:
+			p = "l_uid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setfsgid16 */
+	case 139:
+		switch(ndx) {
+		case 0:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_llseek */
+	case 140:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		case 3:
+			p = "l_loff_t *";
+			break;
+		case 4:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getdents */
+	case 141:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "void *";
+			break;
+		case 2:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_select */
+	case 142:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_fd_set *";
+			break;
+		case 2:
+			p = "l_fd_set *";
+			break;
+		case 3:
+			p = "l_fd_set *";
+			break;
+		case 4:
+			p = "struct l_timeval *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* flock */
+	case 143:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_msync */
+	case 144:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "l_size_t";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_readv */
+	case 145:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "struct l_iovec32 *";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_writev */
+	case 146:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "struct l_iovec32 *";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getsid */
+	case 147:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fdatasync */
+	case 148:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sysctl */
+	case 149:
+		switch(ndx) {
+		case 0:
+			p = "struct l___sysctl_args *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* mlock */
+	case 150:
+		switch(ndx) {
+		case 0:
+			p = "const void *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* munlock */
+	case 151:
+		switch(ndx) {
+		case 0:
+			p = "const void *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* mlockall */
+	case 152:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* munlockall */
+	case 153:
+		break;
+	/* sched_setparam */
+	case 154:
+		switch(ndx) {
+		case 0:
+			p = "pid_t";
+			break;
+		case 1:
+			p = "const struct sched_param *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* sched_getparam */
+	case 155:
+		switch(ndx) {
+		case 0:
+			p = "pid_t";
+			break;
+		case 1:
+			p = "struct sched_param *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_setscheduler */
+	case 156:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "struct l_sched_param *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_getscheduler */
+	case 157:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* sched_yield */
+	case 158:
+		break;
+	/* linux_sched_get_priority_max */
+	case 159:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_get_priority_min */
+	case 160:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_rr_get_interval */
+	case 161:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_nanosleep */
+	case 162:
+		switch(ndx) {
+		case 0:
+			p = "const struct l_timespec *";
+			break;
+		case 1:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mremap */
+	case 163:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		case 3:
+			p = "l_ulong";
+			break;
+		case 4:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setresuid16 */
+	case 164:
+		switch(ndx) {
+		case 0:
+			p = "l_uid16_t";
+			break;
+		case 1:
+			p = "l_uid16_t";
+			break;
+		case 2:
+			p = "l_uid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getresuid16 */
+	case 165:
+		switch(ndx) {
+		case 0:
+			p = "l_uid16_t *";
+			break;
+		case 1:
+			p = "l_uid16_t *";
+			break;
+		case 2:
+			p = "l_uid16_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_query_module */
+	case 167:
+		break;
+	/* poll */
+	case 168:
+		switch(ndx) {
+		case 0:
+			p = "struct pollfd *";
+			break;
+		case 1:
+			p = "unsigned int";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_nfsservctl */
+	case 169:
+		break;
+	/* linux_setresgid16 */
+	case 170:
+		switch(ndx) {
+		case 0:
+			p = "l_gid16_t";
+			break;
+		case 1:
+			p = "l_gid16_t";
+			break;
+		case 2:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getresgid16 */
+	case 171:
+		switch(ndx) {
+		case 0:
+			p = "l_gid16_t *";
+			break;
+		case 1:
+			p = "l_gid16_t *";
+			break;
+		case 2:
+			p = "l_gid16_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_prctl */
+	case 172:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "l_int";
+			break;
+		case 4:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigreturn */
+	case 173:
+		switch(ndx) {
+		case 0:
+			p = "struct l_ucontext *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigaction */
+	case 174:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_sigaction_t *";
+			break;
+		case 2:
+			p = "l_sigaction_t *";
+			break;
+		case 3:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigprocmask */
+	case 175:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_sigset_t *";
+			break;
+		case 2:
+			p = "l_sigset_t *";
+			break;
+		case 3:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigpending */
+	case 176:
+		switch(ndx) {
+		case 0:
+			p = "l_sigset_t *";
+			break;
+		case 1:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigtimedwait */
+	case 177:
+		switch(ndx) {
+		case 0:
+			p = "l_sigset_t *";
+			break;
+		case 1:
+			p = "l_siginfo_t *";
+			break;
+		case 2:
+			p = "struct l_timeval *";
+			break;
+		case 3:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_rt_sigqueueinfo */
+	case 178:
+		break;
+	/* linux_rt_sigsuspend */
+	case 179:
+		switch(ndx) {
+		case 0:
+			p = "l_sigset_t *";
+			break;
+		case 1:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pread */
+	case 180:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "l_size_t";
+			break;
+		case 3:
+			p = "l_loff_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pwrite */
+	case 181:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "l_size_t";
+			break;
+		case 3:
+			p = "l_loff_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_chown16 */
+	case 182:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_uid16_t";
+			break;
+		case 2:
+			p = "l_gid16_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getcwd */
+	case 183:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_capget */
+	case 184:
+		switch(ndx) {
+		case 0:
+			p = "struct l_user_cap_header *";
+			break;
+		case 1:
+			p = "struct l_user_cap_data *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_capset */
+	case 185:
+		switch(ndx) {
+		case 0:
+			p = "struct l_user_cap_header *";
+			break;
+		case 1:
+			p = "struct l_user_cap_data *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sigaltstack */
+	case 186:
+		switch(ndx) {
+		case 0:
+			p = "l_stack_t *";
+			break;
+		case 1:
+			p = "l_stack_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sendfile */
+	case 187:
+		break;
+	/* linux_vfork */
+	case 190:
+		break;
+	/* linux_getrlimit */
+	case 191:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "struct l_rlimit *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mmap2 */
+	case 192:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "l_ulong";
+			break;
+		case 2:
+			p = "l_ulong";
+			break;
+		case 3:
+			p = "l_ulong";
+			break;
+		case 4:
+			p = "l_ulong";
+			break;
+		case 5:
+			p = "l_ulong";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_truncate64 */
+	case 193:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_loff_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_ftruncate64 */
+	case 194:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_loff_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_stat64 */
+	case 195:
+		switch(ndx) {
+		case 0:
+			p = "const char *";
+			break;
+		case 1:
+			p = "struct l_stat64 *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lstat64 */
+	case 196:
+		switch(ndx) {
+		case 0:
+			p = "const char *";
+			break;
+		case 1:
+			p = "struct l_stat64 *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fstat64 */
+	case 197:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "struct l_stat64 *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lchown */
+	case 198:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_uid_t";
+			break;
+		case 2:
+			p = "l_gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getuid */
+	case 199:
+		break;
+	/* linux_getgid */
+	case 200:
+		break;
+	/* geteuid */
+	case 201:
+		break;
+	/* getegid */
+	case 202:
+		break;
+	/* setreuid */
+	case 203:
+		switch(ndx) {
+		case 0:
+			p = "uid_t";
+			break;
+		case 1:
+			p = "uid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setregid */
+	case 204:
+		switch(ndx) {
+		case 0:
+			p = "gid_t";
+			break;
+		case 1:
+			p = "gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getgroups */
+	case 205:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_gid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setgroups */
+	case 206:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "l_gid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* fchown */
+	case 207:
+		break;
+	/* setresuid */
+	case 208:
+		switch(ndx) {
+		case 0:
+			p = "uid_t";
+			break;
+		case 1:
+			p = "uid_t";
+			break;
+		case 2:
+			p = "uid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* getresuid */
+	case 209:
+		switch(ndx) {
+		case 0:
+			p = "uid_t *";
+			break;
+		case 1:
+			p = "uid_t *";
+			break;
+		case 2:
+			p = "uid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setresgid */
+	case 210:
+		switch(ndx) {
+		case 0:
+			p = "gid_t";
+			break;
+		case 1:
+			p = "gid_t";
+			break;
+		case 2:
+			p = "gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* getresgid */
+	case 211:
+		switch(ndx) {
+		case 0:
+			p = "gid_t *";
+			break;
+		case 1:
+			p = "gid_t *";
+			break;
+		case 2:
+			p = "gid_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_chown */
+	case 212:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "l_uid_t";
+			break;
+		case 2:
+			p = "l_gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setuid */
+	case 213:
+		switch(ndx) {
+		case 0:
+			p = "uid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* setgid */
+	case 214:
+		switch(ndx) {
+		case 0:
+			p = "gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setfsuid */
+	case 215:
+		switch(ndx) {
+		case 0:
+			p = "l_uid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_setfsgid */
+	case 216:
+		switch(ndx) {
+		case 0:
+			p = "l_gid_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pivot_root */
+	case 217:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mincore */
+	case 218:
+		switch(ndx) {
+		case 0:
+			p = "l_ulong";
+			break;
+		case 1:
+			p = "l_size_t";
+			break;
+		case 2:
+			p = "u_char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* madvise */
+	case 219:
+		switch(ndx) {
+		case 0:
+			p = "void *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_getdents64 */
+	case 220:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "void *";
+			break;
+		case 2:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fcntl64 */
+	case 221:
+		switch(ndx) {
+		case 0:
+			p = "l_uint";
+			break;
+		case 1:
+			p = "l_uint";
+			break;
+		case 2:
+			p = "uintptr_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_gettid */
+	case 224:
+		break;
+	/* linux_setxattr */
+	case 226:
+		break;
+	/* linux_lsetxattr */
+	case 227:
+		break;
+	/* linux_fsetxattr */
+	case 228:
+		break;
+	/* linux_getxattr */
+	case 229:
+		break;
+	/* linux_lgetxattr */
+	case 230:
+		break;
+	/* linux_fgetxattr */
+	case 231:
+		break;
+	/* linux_listxattr */
+	case 232:
+		break;
+	/* linux_llistxattr */
+	case 233:
+		break;
+	/* linux_flistxattr */
+	case 234:
+		break;
+	/* linux_removexattr */
+	case 235:
+		break;
+	/* linux_lremovexattr */
+	case 236:
+		break;
+	/* linux_fremovexattr */
+	case 237:
+		break;
+	/* linux_tkill */
+	case 238:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sys_futex */
+	case 240:
+		switch(ndx) {
+		case 0:
+			p = "void *";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "uint32_t";
+			break;
+		case 3:
+			p = "struct l_timespec *";
+			break;
+		case 4:
+			p = "uint32_t *";
+			break;
+		case 5:
+			p = "uint32_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_setaffinity */
+	case 241:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "l_uint";
+			break;
+		case 2:
+			p = "l_ulong *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_sched_getaffinity */
+	case 242:
+		switch(ndx) {
+		case 0:
+			p = "l_pid_t";
+			break;
+		case 1:
+			p = "l_uint";
+			break;
+		case 2:
+			p = "l_ulong *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_set_thread_area */
+	case 243:
+		switch(ndx) {
+		case 0:
+			p = "struct l_user_desc *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fadvise64 */
+	case 250:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "l_loff_t";
+			break;
+		case 2:
+			p = "l_size_t";
+			break;
+		case 3:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_exit_group */
+	case 252:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_lookup_dcookie */
+	case 253:
+		break;
+	/* linux_epoll_create */
+	case 254:
+		break;
+	/* linux_epoll_ctl */
+	case 255:
+		break;
+	/* linux_epoll_wait */
+	case 256:
+		break;
+	/* linux_remap_file_pages */
+	case 257:
+		break;
+	/* linux_set_tid_address */
+	case 258:
+		switch(ndx) {
+		case 0:
+			p = "int *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_timer_create */
+	case 259:
+		break;
+	/* linux_timer_settime */
+	case 260:
+		break;
+	/* linux_timer_gettime */
+	case 261:
+		break;
+	/* linux_timer_getoverrun */
+	case 262:
+		break;
+	/* linux_timer_delete */
+	case 263:
+		break;
+	/* linux_clock_settime */
+	case 264:
+		switch(ndx) {
+		case 0:
+			p = "clockid_t";
+			break;
+		case 1:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_clock_gettime */
+	case 265:
+		switch(ndx) {
+		case 0:
+			p = "clockid_t";
+			break;
+		case 1:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_clock_getres */
+	case 266:
+		switch(ndx) {
+		case 0:
+			p = "clockid_t";
+			break;
+		case 1:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_clock_nanosleep */
+	case 267:
+		switch(ndx) {
+		case 0:
+			p = "clockid_t";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "struct l_timespec *";
+			break;
+		case 3:
+			p = "struct l_timespec *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_statfs64 */
+	case 268:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		case 2:
+			p = "struct l_statfs64_buf *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fstatfs64 */
+	case 269:
+		break;
+	/* linux_tgkill */
+	case 270:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "int";
+			break;
+		case 2:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_utimes */
+	case 271:
+		switch(ndx) {
+		case 0:
+			p = "char *";
+			break;
+		case 1:
+			p = "struct l_timeval *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fadvise64_64 */
+	case 272:
+		switch(ndx) {
+		case 0:
+			p = "int";
+			break;
+		case 1:
+			p = "l_loff_t";
+			break;
+		case 2:
+			p = "l_loff_t";
+			break;
+		case 3:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mbind */
+	case 274:
+		break;
+	/* linux_get_mempolicy */
+	case 275:
+		break;
+	/* linux_set_mempolicy */
+	case 276:
+		break;
+	/* linux_mq_open */
+	case 277:
+		break;
+	/* linux_mq_unlink */
+	case 278:
+		break;
+	/* linux_mq_timedsend */
+	case 279:
+		break;
+	/* linux_mq_timedreceive */
+	case 280:
+		break;
+	/* linux_mq_notify */
+	case 281:
+		break;
+	/* linux_mq_getsetattr */
+	case 282:
+		break;
+	/* linux_kexec_load */
+	case 283:
+		break;
+	/* linux_waitid */
+	case 284:
+		break;
+	/* linux_add_key */
+	case 286:
+		break;
+	/* linux_request_key */
+	case 287:
+		break;
+	/* linux_keyctl */
+	case 288:
+		break;
+	/* linux_ioprio_set */
+	case 289:
+		break;
+	/* linux_ioprio_get */
+	case 290:
+		break;
+	/* linux_inotify_init */
+	case 291:
+		break;
+	/* linux_inotify_add_watch */
+	case 292:
+		break;
+	/* linux_inotify_rm_watch */
+	case 293:
+		break;
+	/* linux_migrate_pages */
+	case 294:
+		break;
+	/* linux_openat */
+	case 295:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mkdirat */
+	case 296:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_mknodat */
+	case 297:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "l_uint";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fchownat */
+	case 298:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_uid16_t";
+			break;
+		case 3:
+			p = "l_gid16_t";
+			break;
+		case 4:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_futimesat */
+	case 299:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "struct l_timeval *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fstatat64 */
+	case 300:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "char *";
+			break;
+		case 2:
+			p = "struct l_stat64 *";
+			break;
+		case 3:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_unlinkat */
+	case 301:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_renameat */
+	case 302:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "const char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_linkat */
+	case 303:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "const char *";
+			break;
+		case 4:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_symlinkat */
+	case 304:
+		switch(ndx) {
+		case 0:
+			p = "const char *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		case 2:
+			p = "const char *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_readlinkat */
+	case 305:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "char *";
+			break;
+		case 3:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_fchmodat */
+	case 306:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_mode_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_faccessat */
+	case 307:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "const char *";
+			break;
+		case 2:
+			p = "l_int";
+			break;
+		case 3:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_pselect6 */
+	case 308:
+		break;
+	/* linux_ppoll */
+	case 309:
+		break;
+	/* linux_unshare */
+	case 310:
+		break;
+	/* linux_set_robust_list */
+	case 311:
+		switch(ndx) {
+		case 0:
+			p = "struct linux_robust_list_head *";
+			break;
+		case 1:
+			p = "l_size_t";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_get_robust_list */
+	case 312:
+		switch(ndx) {
+		case 0:
+			p = "l_int";
+			break;
+		case 1:
+			p = "struct linux_robust_list_head *";
+			break;
+		case 2:
+			p = "l_size_t *";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_splice */
+	case 313:
+		break;
+	/* linux_sync_file_range */
+	case 314:
+		break;
+	/* linux_tee */
+	case 315:
+		break;
+	/* linux_vmsplice */
+	case 316:
+		break;
+	/* linux_move_pages */
+	case 317:
+		break;
+	/* linux_getcpu */
+	case 318:
+		break;
+	/* linux_epoll_pwait */
+	case 319:
+		break;
+	/* linux_utimensat */
+	case 320:
+		break;
+	/* linux_signalfd */
+	case 321:
+		break;
+	/* linux_timerfd_create */
+	case 322:
+		break;
+	/* linux_eventfd */
+	case 323:
+		break;
+	/* linux_fallocate */
+	case 324:
+		break;
+	/* linux_timerfd_settime */
+	case 325:
+		break;
+	/* linux_timerfd_gettime */
+	case 326:
+		break;
+	/* linux_signalfd4 */
+	case 327:
+		break;
+	/* linux_eventfd2 */
+	case 328:
+		break;
+	/* linux_epoll_create1 */
+	case 329:
+		break;
+	/* linux_dup3 */
+	case 330:
+		break;
+	/* linux_pipe2 */
+	case 331:
+		switch(ndx) {
+		case 0:
+			p = "l_int *";
+			break;
+		case 1:
+			p = "l_int";
+			break;
+		default:
+			break;
+		};
+		break;
+	/* linux_inotify_init1 */
+	case 332:
+		break;
+	/* linux_preadv */
+	case 333:
+		break;
+	/* linux_pwritev */
+	case 334:
+		break;
+	/* linux_rt_tsigqueueinfo */
+	case 335:
+		break;
+	/* linux_perf_event_open */
+	case 336:
+		break;
+	/* linux_recvmmsg */
+	case 337:
+		break;
+	/* linux_fanotify_init */
+	case 338:
+		break;
+	/* linux_fanotify_mark */
+	case 339:
+		break;
+	/* linux_prlimit64 */
+	case 340:
+		break;
+	/* linux_name_to_handle_at */
+	case 341:
+		break;
+	/* linux_open_by_handle_at */
+	case 342:
+		break;
+	/* linux_clock_adjtime */
+	case 343:
+		break;
+	/* linux_syncfs */
+	case 344:
+		break;
+	/* linux_sendmmsg */
+	case 345:
+		break;
+	/* linux_setns */
+	case 346:
+		break;
+	/* linux_process_vm_readv */
+	case 347:
+		break;
+	/* linux_process_vm_writev */
+	case 348:
+		break;
+	default:
+		break;
+	};
+	if (p != NULL)
+		strlcpy(desc, p, descsz);
+}
+static void
+systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
+{
+	const char *p = NULL;
+	switch (sysnum) {
+#define	nosys	linux_nosys
+	/* sys_exit */
+	case 1:
+		if (ndx == 0 || ndx == 1)
+			p = "void";
+		break;
+	/* linux_fork */
+	case 2:
+	/* read */
+	case 3:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* write */
+	case 4:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_open */
+	case 5:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* close */
+	case 6:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_waitpid */
+	case 7:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_creat */
+	case 8:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_link */
+	case 9:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_unlink */
+	case 10:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_execve */
+	case 11:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_chdir */
+	case 12:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_time */
+	case 13:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mknod */
+	case 14:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_chmod */
+	case 15:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lchown16 */
+	case 16:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_stat */
+	case 18:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lseek */
+	case 19:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getpid */
+	case 20:
+	/* linux_mount */
+	case 21:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_oldumount */
+	case 22:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setuid16 */
+	case 23:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getuid16 */
+	case 24:
+	/* linux_stime */
+	case 25:
+	/* linux_ptrace */
+	case 26:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_alarm */
+	case 27:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pause */
+	case 29:
+	/* linux_utime */
+	case 30:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_access */
+	case 33:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_nice */
+	case 34:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* sync */
+	case 36:
+	/* linux_kill */
+	case 37:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rename */
+	case 38:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mkdir */
+	case 39:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rmdir */
+	case 40:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* dup */
+	case 41:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pipe */
+	case 42:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_times */
+	case 43:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_brk */
+	case 45:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setgid16 */
+	case 46:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getgid16 */
+	case 47:
+	/* linux_signal */
+	case 48:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_geteuid16 */
+	case 49:
+	/* linux_getegid16 */
+	case 50:
+	/* acct */
+	case 51:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_umount */
+	case 52:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_ioctl */
+	case 54:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fcntl */
+	case 55:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setpgid */
+	case 57:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_olduname */
+	case 59:
+	/* umask */
+	case 60:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* chroot */
+	case 61:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_ustat */
+	case 62:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* dup2 */
+	case 63:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getppid */
+	case 64:
+	/* getpgrp */
+	case 65:
+	/* setsid */
+	case 66:
+	/* linux_sigaction */
+	case 67:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sgetmask */
+	case 68:
+	/* linux_ssetmask */
+	case 69:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setreuid16 */
+	case 70:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setregid16 */
+	case 71:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sigsuspend */
+	case 72:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sigpending */
+	case 73:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sethostname */
+	case 74:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setrlimit */
+	case 75:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_old_getrlimit */
+	case 76:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getrusage */
+	case 77:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_gettimeofday */
+	case 78:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_settimeofday */
+	case 79:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getgroups16 */
+	case 80:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setgroups16 */
+	case 81:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_old_select */
+	case 82:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_symlink */
+	case 83:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lstat */
+	case 84:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_readlink */
+	case 85:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* swapon */
+	case 87:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_reboot */
+	case 88:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_readdir */
+	case 89:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mmap */
+	case 90:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* munmap */
+	case 91:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_truncate */
+	case 92:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_ftruncate */
+	case 93:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* fchmod */
+	case 94:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* fchown */
+	case 95:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getpriority */
+	case 96:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setpriority */
+	case 97:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_statfs */
+	case 99:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fstatfs */
+	case 100:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_socketcall */
+	case 102:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_syslog */
+	case 103:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setitimer */
+	case 104:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getitimer */
+	case 105:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_newstat */
+	case 106:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_newlstat */
+	case 107:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_newfstat */
+	case 108:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_uname */
+	case 109:
+	/* linux_iopl */
+	case 110:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_vhangup */
+	case 111:
+	/* linux_wait4 */
+	case 114:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_swapoff */
+	case 115:
+	/* linux_sysinfo */
+	case 116:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_ipc */
+	case 117:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* fsync */
+	case 118:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sigreturn */
+	case 119:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_clone */
+	case 120:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setdomainname */
+	case 121:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_newuname */
+	case 122:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_adjtimex */
+	case 124:
+	/* linux_mprotect */
+	case 125:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sigprocmask */
+	case 126:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_create_module */
+	case 127:
+	/* linux_init_module */
+	case 128:
+	/* linux_delete_module */
+	case 129:
+	/* linux_get_kernel_syms */
+	case 130:
+	/* linux_quotactl */
+	case 131:
+	/* getpgid */
+	case 132:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* fchdir */
+	case 133:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_bdflush */
+	case 134:
+	/* linux_sysfs */
+	case 135:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_personality */
+	case 136:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setfsuid16 */
+	case 138:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setfsgid16 */
+	case 139:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_llseek */
+	case 140:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getdents */
+	case 141:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_select */
+	case 142:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* flock */
+	case 143:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_msync */
+	case 144:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_readv */
+	case 145:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_writev */
+	case 146:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getsid */
+	case 147:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fdatasync */
+	case 148:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sysctl */
+	case 149:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* mlock */
+	case 150:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* munlock */
+	case 151:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* mlockall */
+	case 152:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* munlockall */
+	case 153:
+	/* sched_setparam */
+	case 154:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* sched_getparam */
+	case 155:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_setscheduler */
+	case 156:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_getscheduler */
+	case 157:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* sched_yield */
+	case 158:
+	/* linux_sched_get_priority_max */
+	case 159:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_get_priority_min */
+	case 160:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_rr_get_interval */
+	case 161:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_nanosleep */
+	case 162:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mremap */
+	case 163:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setresuid16 */
+	case 164:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getresuid16 */
+	case 165:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_query_module */
+	case 167:
+	/* poll */
+	case 168:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_nfsservctl */
+	case 169:
+	/* linux_setresgid16 */
+	case 170:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getresgid16 */
+	case 171:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_prctl */
+	case 172:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigreturn */
+	case 173:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigaction */
+	case 174:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigprocmask */
+	case 175:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigpending */
+	case 176:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigtimedwait */
+	case 177:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_rt_sigqueueinfo */
+	case 178:
+	/* linux_rt_sigsuspend */
+	case 179:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pread */
+	case 180:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pwrite */
+	case 181:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_chown16 */
+	case 182:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getcwd */
+	case 183:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_capget */
+	case 184:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_capset */
+	case 185:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sigaltstack */
+	case 186:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sendfile */
+	case 187:
+	/* linux_vfork */
+	case 190:
+	/* linux_getrlimit */
+	case 191:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mmap2 */
+	case 192:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_truncate64 */
+	case 193:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_ftruncate64 */
+	case 194:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_stat64 */
+	case 195:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lstat64 */
+	case 196:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fstat64 */
+	case 197:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lchown */
+	case 198:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getuid */
+	case 199:
+	/* linux_getgid */
+	case 200:
+	/* geteuid */
+	case 201:
+	/* getegid */
+	case 202:
+	/* setreuid */
+	case 203:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setregid */
+	case 204:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getgroups */
+	case 205:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setgroups */
+	case 206:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* fchown */
+	case 207:
+	/* setresuid */
+	case 208:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* getresuid */
+	case 209:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setresgid */
+	case 210:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* getresgid */
+	case 211:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_chown */
+	case 212:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setuid */
+	case 213:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* setgid */
+	case 214:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setfsuid */
+	case 215:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_setfsgid */
+	case 216:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pivot_root */
+	case 217:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mincore */
+	case 218:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* madvise */
+	case 219:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_getdents64 */
+	case 220:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fcntl64 */
+	case 221:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_gettid */
+	case 224:
+	/* linux_setxattr */
+	case 226:
+	/* linux_lsetxattr */
+	case 227:
+	/* linux_fsetxattr */
+	case 228:
+	/* linux_getxattr */
+	case 229:
+	/* linux_lgetxattr */
+	case 230:
+	/* linux_fgetxattr */
+	case 231:
+	/* linux_listxattr */
+	case 232:
+	/* linux_llistxattr */
+	case 233:
+	/* linux_flistxattr */
+	case 234:
+	/* linux_removexattr */
+	case 235:
+	/* linux_lremovexattr */
+	case 236:
+	/* linux_fremovexattr */
+	case 237:
+	/* linux_tkill */
+	case 238:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sys_futex */
+	case 240:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_setaffinity */
+	case 241:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_sched_getaffinity */
+	case 242:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_set_thread_area */
+	case 243:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fadvise64 */
+	case 250:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_exit_group */
+	case 252:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_lookup_dcookie */
+	case 253:
+	/* linux_epoll_create */
+	case 254:
+	/* linux_epoll_ctl */
+	case 255:
+	/* linux_epoll_wait */
+	case 256:
+	/* linux_remap_file_pages */
+	case 257:
+	/* linux_set_tid_address */
+	case 258:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_timer_create */
+	case 259:
+	/* linux_timer_settime */
+	case 260:
+	/* linux_timer_gettime */
+	case 261:
+	/* linux_timer_getoverrun */
+	case 262:
+	/* linux_timer_delete */
+	case 263:
+	/* linux_clock_settime */
+	case 264:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_clock_gettime */
+	case 265:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_clock_getres */
+	case 266:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_clock_nanosleep */
+	case 267:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_statfs64 */
+	case 268:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fstatfs64 */
+	case 269:
+	/* linux_tgkill */
+	case 270:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_utimes */
+	case 271:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fadvise64_64 */
+	case 272:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mbind */
+	case 274:
+	/* linux_get_mempolicy */
+	case 275:
+	/* linux_set_mempolicy */
+	case 276:
+	/* linux_mq_open */
+	case 277:
+	/* linux_mq_unlink */
+	case 278:
+	/* linux_mq_timedsend */
+	case 279:
+	/* linux_mq_timedreceive */
+	case 280:
+	/* linux_mq_notify */
+	case 281:
+	/* linux_mq_getsetattr */
+	case 282:
+	/* linux_kexec_load */
+	case 283:
+	/* linux_waitid */
+	case 284:
+	/* linux_add_key */
+	case 286:
+	/* linux_request_key */
+	case 287:
+	/* linux_keyctl */
+	case 288:
+	/* linux_ioprio_set */
+	case 289:
+	/* linux_ioprio_get */
+	case 290:
+	/* linux_inotify_init */
+	case 291:
+	/* linux_inotify_add_watch */
+	case 292:
+	/* linux_inotify_rm_watch */
+	case 293:
+	/* linux_migrate_pages */
+	case 294:
+	/* linux_openat */
+	case 295:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mkdirat */
+	case 296:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_mknodat */
+	case 297:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fchownat */
+	case 298:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_futimesat */
+	case 299:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fstatat64 */
+	case 300:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_unlinkat */
+	case 301:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_renameat */
+	case 302:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_linkat */
+	case 303:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_symlinkat */
+	case 304:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_readlinkat */
+	case 305:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_fchmodat */
+	case 306:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_faccessat */
+	case 307:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_pselect6 */
+	case 308:
+	/* linux_ppoll */
+	case 309:
+	/* linux_unshare */
+	case 310:
+	/* linux_set_robust_list */
+	case 311:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_get_robust_list */
+	case 312:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_splice */
+	case 313:
+	/* linux_sync_file_range */
+	case 314:
+	/* linux_tee */
+	case 315:
+	/* linux_vmsplice */
+	case 316:
+	/* linux_move_pages */
+	case 317:
+	/* linux_getcpu */
+	case 318:
+	/* linux_epoll_pwait */
+	case 319:
+	/* linux_utimensat */
+	case 320:
+	/* linux_signalfd */
+	case 321:
+	/* linux_timerfd_create */
+	case 322:
+	/* linux_eventfd */
+	case 323:
+	/* linux_fallocate */
+	case 324:
+	/* linux_timerfd_settime */
+	case 325:
+	/* linux_timerfd_gettime */
+	case 326:
+	/* linux_signalfd4 */
+	case 327:
+	/* linux_eventfd2 */
+	case 328:
+	/* linux_epoll_create1 */
+	case 329:
+	/* linux_dup3 */
+	case 330:
+	/* linux_pipe2 */
+	case 331:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* linux_inotify_init1 */
+	case 332:
+	/* linux_preadv */
+	case 333:
+	/* linux_pwritev */
+	case 334:
+	/* linux_rt_tsigqueueinfo */
+	case 335:
+	/* linux_perf_event_open */
+	case 336:
+	/* linux_recvmmsg */
+	case 337:
+	/* linux_fanotify_init */
+	case 338:
+	/* linux_fanotify_mark */
+	case 339:
+	/* linux_prlimit64 */
+	case 340:
+	/* linux_name_to_handle_at */
+	case 341:
+	/* linux_open_by_handle_at */
+	case 342:
+	/* linux_clock_adjtime */
+	case 343:
+	/* linux_syncfs */
+	case 344:
+	/* linux_sendmmsg */
+	case 345:
+	/* linux_setns */
+	case 346:
+	/* linux_process_vm_readv */
+	case 347:
+	/* linux_process_vm_writev */
+	case 348:
+	default:
+		break;
+	};
+	if (p != NULL)
+		strlcpy(desc, p, descsz);
+}
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
new file mode 100644
index 0000000..42500da
--- /dev/null
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -0,0 +1,1205 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2003 Peter Wemm
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 1998-1999 Andrew Gallatin
+ * Copyright (c) 1994-1996 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_compat.h"
+
+#ifndef COMPAT_FREEBSD32
+#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
+#endif
+
+#define	__ELF_WORD_SIZE	32
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/exec.h>
+#include <sys/fcntl.h>
+#include <sys/imgact.h>
+#include <sys/imgact_elf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/sysctl.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/vnode.h>
+#include <sys/eventhandler.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+
+#include <amd64/linux32/linux.h>
+#include <amd64/linux32/linux32_proto.h>
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_futex.h>
+#include <compat/linux/linux_ioctl.h>
+#include <compat/linux/linux_mib.h>
+#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_signal.h>
+#include <compat/linux/linux_util.h>
+
+MODULE_VERSION(linux, 1);
+
+MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
+
+#define	AUXARGS_ENTRY_32(pos, id, val)	\
+	do {				\
+		suword32(pos++, id);	\
+		suword32(pos++, val);	\
+	} while (0)
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define SHELLMAGIC      0x2123 /* #! */
+#else
+#define SHELLMAGIC      0x2321
+#endif
+
+/*
+ * Allow the sendsig functions to use the ldebug() facility
+ * even though they are not syscalls themselves. Map them
+ * to syscall 0. This is slightly less bogus than using
+ * ldebug(sigreturn).
+ */
+#define	LINUX_SYS_linux_rt_sendsig	0
+#define	LINUX_SYS_linux_sendsig		0
+
+const char *linux_platform = "i686";
+static int linux_szplatform;
+extern char linux_sigcode[];
+extern int linux_szsigcode;
+
+extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
+
+SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
+SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
+
+static int	elf_linux_fixup(register_t **stack_base,
+		    struct image_params *iparams);
+static register_t *linux_copyout_strings(struct image_params *imgp);
+static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
+static void	exec_linux_setregs(struct thread *td, 
+				   struct image_params *imgp, u_long stack);
+static void	linux32_fixlimit(struct rlimit *rl, int which);
+static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
+
+static eventhandler_tag linux_exit_tag;
+static eventhandler_tag linux_exec_tag;
+
+/*
+ * Linux syscalls return negative errno's, we do positive and map them
+ * Reference:
+ *   FreeBSD: src/sys/sys/errno.h
+ *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
+ *            linux-2.6.17.8/include/asm-generic/errno.h
+ */
+static int bsd_to_linux_errno[ELAST + 1] = {
+	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
+	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
+	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
+	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
+	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
+	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
+	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
+	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
+	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
+	 -72, -67, -71
+};
+
+int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
+	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
+	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
+	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
+	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
+	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
+	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
+	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
+	0, LINUX_SIGUSR1, LINUX_SIGUSR2
+};
+
+int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
+	SIGHUP, SIGINT, SIGQUIT, SIGILL,
+	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
+	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
+	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
+	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
+	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
+	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
+	SIGIO, SIGURG, SIGSYS
+};
+
+#define LINUX_T_UNKNOWN  255
+static int _bsd_to_linux_trapcode[] = {
+	LINUX_T_UNKNOWN,	/* 0 */
+	6,			/* 1  T_PRIVINFLT */
+	LINUX_T_UNKNOWN,	/* 2 */
+	3,			/* 3  T_BPTFLT */
+	LINUX_T_UNKNOWN,	/* 4 */
+	LINUX_T_UNKNOWN,	/* 5 */
+	16,			/* 6  T_ARITHTRAP */
+	254,			/* 7  T_ASTFLT */
+	LINUX_T_UNKNOWN,	/* 8 */
+	13,			/* 9  T_PROTFLT */
+	1,			/* 10 T_TRCTRAP */
+	LINUX_T_UNKNOWN,	/* 11 */
+	14,			/* 12 T_PAGEFLT */
+	LINUX_T_UNKNOWN,	/* 13 */
+	17,			/* 14 T_ALIGNFLT */
+	LINUX_T_UNKNOWN,	/* 15 */
+	LINUX_T_UNKNOWN,	/* 16 */
+	LINUX_T_UNKNOWN,	/* 17 */
+	0,			/* 18 T_DIVIDE */
+	2,			/* 19 T_NMI */
+	4,			/* 20 T_OFLOW */
+	5,			/* 21 T_BOUND */
+	7,			/* 22 T_DNA */
+	8,			/* 23 T_DOUBLEFLT */
+	9,			/* 24 T_FPOPFLT */
+	10,			/* 25 T_TSSFLT */
+	11,			/* 26 T_SEGNPFLT */
+	12,			/* 27 T_STKFLT */
+	18,			/* 28 T_MCHK */
+	19,			/* 29 T_XMMFLT */
+	15			/* 30 T_RESERVED */
+};
+#define bsd_to_linux_trapcode(code) \
+    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
+     _bsd_to_linux_trapcode[(code)]: \
+     LINUX_T_UNKNOWN)
+
+struct linux32_ps_strings {
+	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
+	u_int ps_nargvstr;	/* the number of argument strings */
+	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
+	u_int ps_nenvstr;	/* the number of environment strings */
+};
+
+/*
+ * If FreeBSD & Linux have a difference of opinion about what a trap
+ * means, deal with it here.
+ *
+ * MPSAFE
+ */
+static int
+translate_traps(int signal, int trap_code)
+{
+	if (signal != SIGBUS)
+		return signal;
+	switch (trap_code) {
+	case T_PROTFLT:
+	case T_TSSFLT:
+	case T_DOUBLEFLT:
+	case T_PAGEFLT:
+		return SIGSEGV;
+	default:
+		return signal;
+	}
+}
+
+static int
+elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
+{
+	Elf32_Auxargs *args;
+	Elf32_Addr *base;
+	Elf32_Addr *pos, *uplatform;
+	struct linux32_ps_strings *arginfo;
+
+	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
+	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
+
+	KASSERT(curthread->td_proc == imgp->proc,
+	    ("unsafe elf_linux_fixup(), should be curproc"));
+	base = (Elf32_Addr *)*stack_base;
+	args = (Elf32_Auxargs *)imgp->auxargs;
+	pos = base + (imgp->args->argc + imgp->args->envc + 2);
+
+	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
+
+	/*
+	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
+	 * as it has appeared in the 2.4.0-rc7 first time.
+	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
+	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
+	 * is not present.
+	 * Also see linux_times() implementation.
+	 */
+	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
+		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
+	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
+	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
+	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
+	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
+	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
+	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
+	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
+	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
+	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
+	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
+	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
+	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
+	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
+	if (args->execfd != -1)
+		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
+	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
+
+	free(imgp->auxargs, M_TEMP);
+	imgp->auxargs = NULL;
+
+	base--;
+	suword32(base, (uint32_t)imgp->args->argc);
+	*stack_base = (register_t *)base;
+	return 0;
+}
+
+extern unsigned long linux_sznonrtsigcode;
+
+static void
+linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	struct l_rt_sigframe *fp, frame;
+	int oonstack;
+	int sig;
+	int code;
+	
+	sig = ksi->ksi_signo;
+	code = ksi->ksi_code;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+#ifdef DEBUG
+	if (ldebug(rt_sendsig))
+		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
+		    catcher, sig, (void*)mask, code);
+#endif
+	/*
+	 * Allocate space for the signal handler context.
+	 */
+	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
+	} else
+		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
+	mtx_unlock(&psp->ps_mtx);
+
+	/*
+	 * Build the argument list for the signal handler.
+	 */
+	if (p->p_sysent->sv_sigtbl)
+		if (sig <= p->p_sysent->sv_sigsize)
+			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	bzero(&frame, sizeof(frame));
+
+	frame.sf_handler = PTROUT(catcher);
+	frame.sf_sig = sig;
+	frame.sf_siginfo = PTROUT(&fp->sf_si);
+	frame.sf_ucontext = PTROUT(&fp->sf_sc);
+
+	/* Fill in POSIX parts */
+	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
+
+	/*
+	 * Build the signal context to be used by sigreturn.
+	 */
+	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
+	frame.sf_sc.uc_link = 0;		/* XXX ??? */
+
+	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
+	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
+	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
+	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
+	PROC_UNLOCK(p);
+
+	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
+
+	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
+	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
+	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
+	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
+	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
+	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
+	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
+	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
+	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
+	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
+	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
+	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
+	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
+	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
+	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
+	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
+	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
+	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
+	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
+	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
+
+#ifdef DEBUG
+	if (ldebug(rt_sendsig))
+		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
+		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
+		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
+#endif
+
+	if (copyout(&frame, fp, sizeof(frame)) != 0) {
+		/*
+		 * Process has trashed its stack; give it an illegal
+		 * instruction to halt it in its tracks.
+		 */
+#ifdef DEBUG
+		if (ldebug(rt_sendsig))
+			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
+			    fp, oonstack);
+#endif
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	/*
+	 * Build context to run handler in.
+	 */
+	regs->tf_rsp = PTROUT(fp);
+	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucode32sel;
+	regs->tf_ss = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _ufssel;
+	regs->tf_gs = _ugssel;
+	regs->tf_flags = TF_HASSEGS;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * in u. to call routine, followed by kcall
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+static void
+linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	struct sigacts *psp;
+	struct trapframe *regs;
+	struct l_sigframe *fp, frame;
+	l_sigset_t lmask;
+	int oonstack, i;
+	int sig, code;
+
+	sig = ksi->ksi_signo;
+	code = ksi->ksi_code;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	psp = p->p_sigacts;
+	mtx_assert(&psp->ps_mtx, MA_OWNED);
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		linux_rt_sendsig(catcher, ksi, mask);
+		return;
+	}
+
+	regs = td->td_frame;
+	oonstack = sigonstack(regs->tf_rsp);
+
+#ifdef DEBUG
+	if (ldebug(sendsig))
+		printf(ARGS(sendsig, "%p, %d, %p, %u"),
+		    catcher, sig, (void*)mask, code);
+#endif
+
+	/*
+	 * Allocate space for the signal handler context.
+	 */
+	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
+		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
+	} else
+		fp = (struct l_sigframe *)regs->tf_rsp - 1;
+	mtx_unlock(&psp->ps_mtx);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Build the argument list for the signal handler.
+	 */
+	if (p->p_sysent->sv_sigtbl)
+		if (sig <= p->p_sysent->sv_sigsize)
+			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+
+	bzero(&frame, sizeof(frame));
+
+	frame.sf_handler = PTROUT(catcher);
+	frame.sf_sig = sig;
+
+	bsd_to_linux_sigset(mask, &lmask);
+
+	/*
+	 * Build the signal context to be used by sigreturn.
+	 */
+	frame.sf_sc.sc_mask   = lmask.__bits[0];
+	frame.sf_sc.sc_gs     = regs->tf_gs;
+	frame.sf_sc.sc_fs     = regs->tf_fs;
+	frame.sf_sc.sc_es     = regs->tf_es;
+	frame.sf_sc.sc_ds     = regs->tf_ds;
+	frame.sf_sc.sc_edi    = regs->tf_rdi;
+	frame.sf_sc.sc_esi    = regs->tf_rsi;
+	frame.sf_sc.sc_ebp    = regs->tf_rbp;
+	frame.sf_sc.sc_ebx    = regs->tf_rbx;
+	frame.sf_sc.sc_edx    = regs->tf_rdx;
+	frame.sf_sc.sc_ecx    = regs->tf_rcx;
+	frame.sf_sc.sc_eax    = regs->tf_rax;
+	frame.sf_sc.sc_eip    = regs->tf_rip;
+	frame.sf_sc.sc_cs     = regs->tf_cs;
+	frame.sf_sc.sc_eflags = regs->tf_rflags;
+	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
+	frame.sf_sc.sc_ss     = regs->tf_ss;
+	frame.sf_sc.sc_err    = regs->tf_err;
+	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
+	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
+
+	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
+		frame.sf_extramask[i] = lmask.__bits[i+1];
+
+	if (copyout(&frame, fp, sizeof(frame)) != 0) {
+		/*
+		 * Process has trashed its stack; give it an illegal
+		 * instruction to halt it in its tracks.
+		 */
+		PROC_LOCK(p);
+		sigexit(td, SIGILL);
+	}
+
+	/*
+	 * Build context to run handler in.
+	 */
+	regs->tf_rsp = PTROUT(fp);
+	regs->tf_rip = p->p_sysent->sv_sigcode_base;
+	regs->tf_rflags &= ~(PSL_T | PSL_D);
+	regs->tf_cs = _ucode32sel;
+	regs->tf_ss = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_es = _udatasel;
+	regs->tf_fs = _ufssel;
+	regs->tf_gs = _ugssel;
+	regs->tf_flags = TF_HASSEGS;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+	PROC_LOCK(p);
+	mtx_lock(&psp->ps_mtx);
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * psl to gain improper privileges or to cause
+ * a machine fault.
+ */
+int
+linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
+{
+	struct l_sigframe frame;
+	struct trapframe *regs;
+	sigset_t bmask;
+	l_sigset_t lmask;
+	int eflags, i;
+	ksiginfo_t ksi;
+
+	regs = td->td_frame;
+
+#ifdef DEBUG
+	if (ldebug(sigreturn))
+		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
+#endif
+	/*
+	 * The trampoline code hands us the sigframe.
+	 * It is unsafe to keep track of it ourselves, in the event that a
+	 * program jumps out of a signal handler.
+	 */
+	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
+		return (EFAULT);
+
+	/*
+	 * Check for security violations.
+	 */
+#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+	eflags = frame.sf_sc.sc_eflags;
+	/*
+	 * XXX do allow users to change the privileged flag PSL_RF.  The
+	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
+	 * sometimes set it there too.  tf_eflags is kept in the signal
+	 * context during signal handling and there is no other place
+	 * to remember it, so the PSL_RF bit may be corrupted by the
+	 * signal handler without us knowing.  Corruption of the PSL_RF
+	 * bit at worst causes one more or one less debugger trap, so
+	 * allowing it is fairly harmless.
+	 */
+	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
+		return(EINVAL);
+
+	/*
+	 * Don't allow users to load a valid privileged %cs.  Let the
+	 * hardware check for invalid selectors, excess privilege in
+	 * other selectors, invalid %eip's and invalid %esp's.
+	 */
+#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
+	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return(EINVAL);
+	}
+
+	lmask.__bits[0] = frame.sf_sc.sc_mask;
+	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
+		lmask.__bits[i+1] = frame.sf_extramask[i];
+	linux_to_bsd_sigset(&lmask, &bmask);
+	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
+
+	/*
+	 * Restore signal context.
+	 */
+	regs->tf_rdi    = frame.sf_sc.sc_edi;
+	regs->tf_rsi    = frame.sf_sc.sc_esi;
+	regs->tf_rbp    = frame.sf_sc.sc_ebp;
+	regs->tf_rbx    = frame.sf_sc.sc_ebx;
+	regs->tf_rdx    = frame.sf_sc.sc_edx;
+	regs->tf_rcx    = frame.sf_sc.sc_ecx;
+	regs->tf_rax    = frame.sf_sc.sc_eax;
+	regs->tf_rip    = frame.sf_sc.sc_eip;
+	regs->tf_cs     = frame.sf_sc.sc_cs;
+	regs->tf_ds     = frame.sf_sc.sc_ds;
+	regs->tf_es     = frame.sf_sc.sc_es;
+	regs->tf_fs     = frame.sf_sc.sc_fs;
+	regs->tf_gs     = frame.sf_sc.sc_gs;
+	regs->tf_rflags = eflags;
+	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
+	regs->tf_ss     = frame.sf_sc.sc_ss;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+
+	return (EJUSTRETURN);
+}
+
+/*
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by rt_sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * psl to gain improper privileges or to cause
+ * a machine fault.
+ */
+int
+linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
+{
+	struct l_ucontext uc;
+	struct l_sigcontext *context;
+	sigset_t bmask;
+	l_stack_t *lss;
+	stack_t ss;
+	struct trapframe *regs;
+	int eflags;
+	ksiginfo_t ksi;
+
+	regs = td->td_frame;
+
+#ifdef DEBUG
+	if (ldebug(rt_sigreturn))
+		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
+#endif
+	/*
+	 * The trampoline code hands us the ucontext.
+	 * It is unsafe to keep track of it ourselves, in the event that a
+	 * program jumps out of a signal handler.
+	 */
+	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
+		return (EFAULT);
+
+	context = &uc.uc_mcontext;
+
+	/*
+	 * Check for security violations.
+	 */
+#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+	eflags = context->sc_eflags;
+	/*
+	 * XXX do allow users to change the privileged flag PSL_RF.  The
+	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
+	 * sometimes set it there too.  tf_eflags is kept in the signal
+	 * context during signal handling and there is no other place
+	 * to remember it, so the PSL_RF bit may be corrupted by the
+	 * signal handler without us knowing.  Corruption of the PSL_RF
+	 * bit at worst causes one more or one less debugger trap, so
+	 * allowing it is fairly harmless.
+	 */
+	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
+		return(EINVAL);
+
+	/*
+	 * Don't allow users to load a valid privileged %cs.  Let the
+	 * hardware check for invalid selectors, excess privilege in
+	 * other selectors, invalid %eip's and invalid %esp's.
+	 */
+#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
+	if (!CS_SECURE(context->sc_cs)) {
+		ksiginfo_init_trap(&ksi);
+		ksi.ksi_signo = SIGBUS;
+		ksi.ksi_code = BUS_OBJERR;
+		ksi.ksi_trapno = T_PROTFLT;
+		ksi.ksi_addr = (void *)regs->tf_rip;
+		trapsignal(td, &ksi);
+		return(EINVAL);
+	}
+
+	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
+	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
+
+	/*
+	 * Restore signal context
+	 */
+	regs->tf_gs	= context->sc_gs;
+	regs->tf_fs	= context->sc_fs;
+	regs->tf_es	= context->sc_es;
+	regs->tf_ds	= context->sc_ds;
+	regs->tf_rdi    = context->sc_edi;
+	regs->tf_rsi    = context->sc_esi;
+	regs->tf_rbp    = context->sc_ebp;
+	regs->tf_rbx    = context->sc_ebx;
+	regs->tf_rdx    = context->sc_edx;
+	regs->tf_rcx    = context->sc_ecx;
+	regs->tf_rax    = context->sc_eax;
+	regs->tf_rip    = context->sc_eip;
+	regs->tf_cs     = context->sc_cs;
+	regs->tf_rflags = eflags;
+	regs->tf_rsp    = context->sc_esp_at_signal;
+	regs->tf_ss     = context->sc_ss;
+	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
+
+	/*
+	 * call sigaltstack & ignore results..
+	 */
+	lss = &uc.uc_stack;
+	ss.ss_sp = PTRIN(lss->ss_sp);
+	ss.ss_size = lss->ss_size;
+	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
+
+#ifdef DEBUG
+	if (ldebug(rt_sigreturn))
+		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
+		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
+#endif
+	(void)kern_sigaltstack(td, &ss, NULL);
+
+	return (EJUSTRETURN);
+}
+
+static int
+linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+{
+	struct proc *p;
+	struct trapframe *frame;
+
+	p = td->td_proc;
+	frame = td->td_frame;
+
+	sa->args[0] = frame->tf_rbx;
+	sa->args[1] = frame->tf_rcx;
+	sa->args[2] = frame->tf_rdx;
+	sa->args[3] = frame->tf_rsi;
+	sa->args[4] = frame->tf_rdi;
+	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
+	sa->code = frame->tf_rax;
+
+	if (sa->code >= p->p_sysent->sv_size)
+		sa->callp = &p->p_sysent->sv_table[0];
+	else
+		sa->callp = &p->p_sysent->sv_table[sa->code];
+	sa->narg = sa->callp->sy_narg;
+
+	td->td_retval[0] = 0;
+	td->td_retval[1] = frame->tf_rdx;
+
+	return (0);
+}
+
+/*
+ * If a linux binary is exec'ing something, try this image activator
+ * first.  We override standard shell script execution in order to
+ * be able to modify the interpreter path.  We only do this if a linux
+ * binary is doing the exec, so we do not create an EXEC module for it.
+ */
+static int	exec_linux_imgact_try(struct image_params *iparams);
+
+static int
+exec_linux_imgact_try(struct image_params *imgp)
+{
+	const char *head = (const char *)imgp->image_header;
+	char *rpath;
+	int error = -1;
+
+	/*
+	* The interpreter for shell scripts run from a linux binary needs
+	* to be located in /compat/linux if possible in order to recursively
+	* maintain linux path emulation.
+	*/
+	if (((const short *)head)[0] == SHELLMAGIC) {
+		/*
+		* Run our normal shell image activator.  If it succeeds attempt
+		* to use the alternate path for the interpreter.  If an
+		* alternate * path is found, use our stringspace to store it.
+		*/
+		if ((error = exec_shell_imgact(imgp)) == 0) {
+			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
+			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
+			    AT_FDCWD);
+			if (rpath != NULL)
+				imgp->args->fname_buf =
+				    imgp->interpreter_name = rpath;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Clear registers on exec
+ * XXX copied from ia32_signal.c.
+ */
+static void
+exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
+{
+	struct trapframe *regs = td->td_frame;
+	struct pcb *pcb = td->td_pcb;
+
+	mtx_lock(&dt_lock);
+	if (td->td_proc->p_md.md_ldt != NULL)
+		user_ldt_free(td);
+	else
+		mtx_unlock(&dt_lock);
+
+	critical_enter();
+	wrmsr(MSR_FSBASE, 0);
+	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
+	pcb->pcb_fsbase = 0;
+	pcb->pcb_gsbase = 0;
+	critical_exit();
+	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
+
+	bzero((char *)regs, sizeof(struct trapframe));
+	regs->tf_rip = imgp->entry_addr;
+	regs->tf_rsp = stack;
+	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
+	regs->tf_gs = _ugssel;
+	regs->tf_fs = _ufssel;
+	regs->tf_es = _udatasel;
+	regs->tf_ds = _udatasel;
+	regs->tf_ss = _udatasel;
+	regs->tf_flags = TF_HASSEGS;
+	regs->tf_cs = _ucode32sel;
+	regs->tf_rbx = imgp->ps_strings;
+
+	fpstate_drop(td);
+
+	/* Do full restore on return so that we can change to a different %cs */
+	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
+	clear_pcb_flags(pcb, PCB_GS32BIT);
+	td->td_retval[1] = 0;
+}
+
+/*
+ * XXX copied from ia32_sysvec.c.
+ */
+static register_t *
+linux_copyout_strings(struct image_params *imgp)
+{
+	int argc, envc;
+	u_int32_t *vectp;
+	char *stringp, *destp;
+	u_int32_t *stack_base;
+	struct linux32_ps_strings *arginfo;
+
+	/*
+	 * Calculate string base and vector table pointers.
+	 * Also deal with signal trampoline code for this exec type.
+	 */
+	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
+	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
+	    roundup((ARG_MAX - imgp->args->stringspace),
+	    sizeof(char *));
+
+	/*
+	 * Install LINUX_PLATFORM
+	 */
+	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
+	    linux_szplatform);
+
+	/*
+	 * If we have a valid auxargs ptr, prepare some room
+	 * on the stack.
+	 */
+	if (imgp->auxargs) {
+		/*
+		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
+		 * lower compatibility.
+		 */
+		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
+		    (LINUX_AT_COUNT * 2);
+		/*
+		 * The '+ 2' is for the null pointers at the end of each of
+		 * the arg and env vector sets,and imgp->auxarg_size is room
+		 * for argument of Runtime loader.
+		 */
+		vectp = (u_int32_t *) (destp - (imgp->args->argc +
+		    imgp->args->envc + 2 + imgp->auxarg_size) *
+		    sizeof(u_int32_t));
+
+	} else
+		/*
+		 * The '+ 2' is for the null pointers at the end of each of
+		 * the arg and env vector sets
+		 */
+		vectp = (u_int32_t *)(destp - (imgp->args->argc +
+		    imgp->args->envc + 2) * sizeof(u_int32_t));
+
+	/*
+	 * vectp also becomes our initial stack base
+	 */
+	stack_base = vectp;
+
+	stringp = imgp->args->begin_argv;
+	argc = imgp->args->argc;
+	envc = imgp->args->envc;
+	/*
+	 * Copy out strings - arguments and environment.
+	 */
+	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
+
+	/*
+	 * Fill in "ps_strings" struct for ps, w, etc.
+	 */
+	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
+	suword32(&arginfo->ps_nargvstr, argc);
+
+	/*
+	 * Fill in argument portion of vector table.
+	 */
+	for (; argc > 0; --argc) {
+		suword32(vectp++, (uint32_t)(intptr_t)destp);
+		while (*stringp++ != 0)
+			destp++;
+		destp++;
+	}
+
+	/* a null vector table pointer separates the argp's from the envp's */
+	suword32(vectp++, 0);
+
+	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
+	suword32(&arginfo->ps_nenvstr, envc);
+
+	/*
+	 * Fill in environment portion of vector table.
+	 */
+	for (; envc > 0; --envc) {
+		suword32(vectp++, (uint32_t)(intptr_t)destp);
+		while (*stringp++ != 0)
+			destp++;
+		destp++;
+	}
+
+	/* end of vector table is a null pointer */
+	suword32(vectp, 0);
+
+	return ((register_t *)stack_base);
+}
+
+static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
+    "32-bit Linux emulation");
+
+static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
+SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
+    &linux32_maxdsiz, 0, "");
+static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
+SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
+    &linux32_maxssiz, 0, "");
+static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
+SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
+    &linux32_maxvmem, 0, "");
+
+static void
+linux32_fixlimit(struct rlimit *rl, int which)
+{
+
+	switch (which) {
+	case RLIMIT_DATA:
+		if (linux32_maxdsiz != 0) {
+			if (rl->rlim_cur > linux32_maxdsiz)
+				rl->rlim_cur = linux32_maxdsiz;
+			if (rl->rlim_max > linux32_maxdsiz)
+				rl->rlim_max = linux32_maxdsiz;
+		}
+		break;
+	case RLIMIT_STACK:
+		if (linux32_maxssiz != 0) {
+			if (rl->rlim_cur > linux32_maxssiz)
+				rl->rlim_cur = linux32_maxssiz;
+			if (rl->rlim_max > linux32_maxssiz)
+				rl->rlim_max = linux32_maxssiz;
+		}
+		break;
+	case RLIMIT_VMEM:
+		if (linux32_maxvmem != 0) {
+			if (rl->rlim_cur > linux32_maxvmem)
+				rl->rlim_cur = linux32_maxvmem;
+			if (rl->rlim_max > linux32_maxvmem)
+				rl->rlim_max = linux32_maxvmem;
+		}
+		break;
+	}
+}
+
+struct sysentvec elf_linux_sysvec = {
+	.sv_size	= LINUX_SYS_MAXSYSCALL,
+	.sv_table	= linux_sysent,
+	.sv_mask	= 0,
+	.sv_sigsize	= LINUX_SIGTBLSZ,
+	.sv_sigtbl	= bsd_to_linux_signal,
+	.sv_errsize	= ELAST + 1,
+	.sv_errtbl	= bsd_to_linux_errno,
+	.sv_transtrap	= translate_traps,
+	.sv_fixup	= elf_linux_fixup,
+	.sv_sendsig	= linux_sendsig,
+	.sv_sigcode	= linux_sigcode,
+	.sv_szsigcode	= &linux_szsigcode,
+	.sv_prepsyscall	= NULL,
+	.sv_name	= "Linux ELF32",
+	.sv_coredump	= elf32_coredump,
+	.sv_imgact_try	= exec_linux_imgact_try,
+	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
+	.sv_pagesize	= PAGE_SIZE,
+	.sv_minuser	= VM_MIN_ADDRESS,
+	.sv_maxuser	= LINUX32_MAXUSER,
+	.sv_usrstack	= LINUX32_USRSTACK,
+	.sv_psstrings	= LINUX32_PS_STRINGS,
+	.sv_stackprot	= VM_PROT_ALL,
+	.sv_copyout_strings = linux_copyout_strings,
+	.sv_setregs	= exec_linux_setregs,
+	.sv_fixlimit	= linux32_fixlimit,
+	.sv_maxssiz	= &linux32_maxssiz,
+	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
+	.sv_set_syscall_retval = cpu_set_syscall_retval,
+	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
+	.sv_syscallnames = NULL,
+	.sv_shared_page_base = LINUX32_SHAREDPAGE,
+	.sv_shared_page_len = PAGE_SIZE,
+	.sv_schedtail	= linux_schedtail,
+};
+INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
+
+static char GNU_ABI_VENDOR[] = "GNU";
+static int GNULINUX_ABI_DESC = 0;
+
+static boolean_t
+linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
+{
+	const Elf32_Word *desc;
+	uintptr_t p;
+
+	p = (uintptr_t)(note + 1);
+	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
+
+	desc = (const Elf32_Word *)p;
+	if (desc[0] != GNULINUX_ABI_DESC)
+		return (FALSE);
+
+	/*
+	 * For linux we encode osrel as follows (see linux_mib.c):
+	 * VVVMMMIII (version, major, minor), see linux_mib.c.
+	 */
+	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
+
+	return (TRUE);
+}
+
+static Elf_Brandnote linux32_brandnote = {
+	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
+	.hdr.n_descsz	= 16,	/* XXX at least 16 */
+	.hdr.n_type	= 1,
+	.vendor		= GNU_ABI_VENDOR,
+	.flags		= BN_TRANSLATE_OSREL,
+	.trans_osrel	= linux32_trans_osrel
+};
+
+static Elf32_Brandinfo linux_brand = {
+	.brand		= ELFOSABI_LINUX,
+	.machine	= EM_386,
+	.compat_3_brand	= "Linux",
+	.emul_path	= "/compat/linux",
+	.interp_path	= "/lib/ld-linux.so.1",
+	.sysvec		= &elf_linux_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &linux32_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+};
+
+static Elf32_Brandinfo linux_glibc2brand = {
+	.brand		= ELFOSABI_LINUX,
+	.machine	= EM_386,
+	.compat_3_brand	= "Linux",
+	.emul_path	= "/compat/linux",
+	.interp_path	= "/lib/ld-linux.so.2",
+	.sysvec		= &elf_linux_sysvec,
+	.interp_newpath	= NULL,
+	.brand_note	= &linux32_brandnote,
+	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
+};
+
+Elf32_Brandinfo *linux_brandlist[] = {
+	&linux_brand,
+	&linux_glibc2brand,
+	NULL
+};
+
+static int
+linux_elf_modevent(module_t mod, int type, void *data)
+{
+	Elf32_Brandinfo **brandinfo;
+	int error;
+	struct linux_ioctl_handler **lihp;
+	struct linux_device_handler **ldhp;
+
+	error = 0;
+
+	switch(type) {
+	case MOD_LOAD:
+		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
+		     ++brandinfo)
+			if (elf32_insert_brand_entry(*brandinfo) < 0)
+				error = EINVAL;
+		if (error == 0) {
+			SET_FOREACH(lihp, linux_ioctl_handler_set)
+				linux_ioctl_register_handler(*lihp);
+			SET_FOREACH(ldhp, linux_device_handler_set)
+				linux_device_register_handler(*ldhp);
+			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
+			sx_init(&emul_shared_lock, "emuldata->shared lock");
+			LIST_INIT(&futex_list);
+			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
+			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
+			    linux_proc_exit, NULL, 1000);
+			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
+			    linux_proc_exec, NULL, 1000);
+			linux_szplatform = roundup(strlen(linux_platform) + 1,
+			    sizeof(char *));
+			linux_osd_jail_register();
+			stclohz = (stathz ? stathz : hz);
+			if (bootverbose)
+				printf("Linux ELF exec handler installed\n");
+		} else
+			printf("cannot insert Linux ELF brand handler\n");
+		break;
+	case MOD_UNLOAD:
+		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
+		     ++brandinfo)
+			if (elf32_brand_inuse(*brandinfo))
+				error = EBUSY;
+		if (error == 0) {
+			for (brandinfo = &linux_brandlist[0];
+			     *brandinfo != NULL; ++brandinfo)
+				if (elf32_remove_brand_entry(*brandinfo) < 0)
+					error = EINVAL;
+		}
+		if (error == 0) {
+			SET_FOREACH(lihp, linux_ioctl_handler_set)
+				linux_ioctl_unregister_handler(*lihp);
+			SET_FOREACH(ldhp, linux_device_handler_set)
+				linux_device_unregister_handler(*ldhp);
+			mtx_destroy(&emul_lock);
+			sx_destroy(&emul_shared_lock);
+			mtx_destroy(&futex_mtx);
+			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
+			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
+			linux_osd_jail_deregister();
+			if (bootverbose)
+				printf("Linux ELF exec handler removed\n");
+		} else
+			printf("Could not deinstall ELF interpreter entry\n");
+		break;
+	default:
+		return EOPNOTSUPP;
+	}
+	return error;
+}
+
+static moduledata_t linux_elf_mod = {
+	"linuxelf",
+	linux_elf_modevent,
+	0
+};
+
+DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/amd64/linux32/syscalls.conf b/sys/amd64/linux32/syscalls.conf
new file mode 100644
index 0000000..dc0ad82
--- /dev/null
+++ b/sys/amd64/linux32/syscalls.conf
@@ -0,0 +1,11 @@
+# $FreeBSD$
+sysnames="linux32_syscalls.c"
+sysproto="linux32_proto.h"
+sysproto_h=_LINUX_SYSPROTO_H_
+syshdr="linux32_syscall.h"
+syssw="linux32_sysent.c"
+sysmk="/dev/null"
+syscallprefix="LINUX_SYS_"
+switchname="linux_sysent"
+namesname="linux_syscallnames"
+systrace="linux32_systrace_args.c"
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
new file mode 100644
index 0000000..c3a10af
--- /dev/null
+++ b/sys/amd64/linux32/syscalls.master
@@ -0,0 +1,561 @@
+ $FreeBSD$
+
+;	@(#)syscalls.master	8.1 (Berkeley) 7/19/93
+; System call name/number master file (or rather, slave, from LINUX).
+; Processed to create linux_sysent.c, linux_proto.h and linux_syscall.h.
+
+; Columns: number audit type nargs name alt{name,tag,rtyp}/comments
+;	number	system call number, must be in order
+;	audit	the audit event associated with the system call
+;		A value of AUE_NULL means no auditing, but it also means that
+;		there is no audit event for the call at this time. For the
+;		case where the event exists, but we don't want auditing, the
+;		event should be #defined to AUE_NULL in audit_kevents.h.
+;	type	one of STD, OBSOL, UNIMPL
+;	name	psuedo-prototype of syscall routine
+;		If one of the following alts is different, then all appear:
+;	altname	name of system call if different
+;	alttag	name of args struct tag if different from [o]`name'"_args"
+;	altrtyp	return type if not int (bogus - syscalls always return int)
+;		for UNIMPL/OBSOL, name continues with comments
+
+; types:
+;	STD	always included
+;	OBSOL	obsolete, not included in system, only specifies name
+;	UNIMPL	not implemented, placeholder only
+
+#include "opt_compat.h"
+#include <sys/param.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <compat/linux/linux_sysproto.h>
+#include <amd64/linux32/linux.h>
+#include <amd64/linux32/linux32_proto.h>
+
+; Isn't pretty, but there seems to be no other way to trap nosys
+#define	nosys	linux_nosys
+
+; #ifdef's, etc. may be included, and are copied to the output files.
+
+0	AUE_NULL	UNIMPL	setup
+1	AUE_EXIT	NOPROTO	{ void sys_exit(int rval); } exit \
+				    sys_exit_args void
+2	AUE_FORK	STD	{ int linux_fork(void); }
+3	AUE_NULL	NOPROTO	{ int read(int fd, char *buf, \
+				    u_int nbyte); }
+4	AUE_NULL	NOPROTO	{ int write(int fd, char *buf, \
+				    u_int nbyte); }
+5	AUE_OPEN_RWTC	STD	{ int linux_open(char *path, l_int flags, \
+				    l_int mode); }
+6	AUE_CLOSE	NOPROTO	{ int close(int fd); }
+7	AUE_WAIT4	STD	{ int linux_waitpid(l_pid_t pid, \
+				    l_int *status, l_int options); }
+8	AUE_CREAT	STD	{ int linux_creat(char *path, \
+				    l_int mode); }
+9	AUE_LINK	STD	{ int linux_link(char *path, char *to); }
+10	AUE_UNLINK	STD	{ int linux_unlink(char *path); }
+11	AUE_EXECVE	STD	{ int linux_execve(char *path, uint32_t *argp, \
+				    uint32_t *envp); }
+12	AUE_CHDIR	STD	{ int linux_chdir(char *path); }
+13	AUE_NULL	STD	{ int linux_time(l_time_t *tm); }
+14	AUE_MKNOD	STD	{ int linux_mknod(char *path, l_int mode, \
+				    l_dev_t dev); }
+15	AUE_CHMOD	STD	{ int linux_chmod(char *path, \
+				    l_mode_t mode); }
+16	AUE_LCHOWN	STD	{ int linux_lchown16(char *path, \
+				    l_uid16_t uid, l_gid16_t gid); }
+17	AUE_NULL	UNIMPL	break
+18	AUE_STAT	STD	{ int linux_stat(char *path, \
+				    struct linux_stat *up); }
+19	AUE_LSEEK	STD	{ int linux_lseek(l_uint fdes, l_off_t off, \
+				    l_int whence); }
+20	AUE_GETPID	STD	{ int linux_getpid(void); }
+21	AUE_MOUNT	STD	{ int linux_mount(char *specialfile, \
+				    char *dir, char *filesystemtype, \
+				    l_ulong rwflag, void *data); }
+22	AUE_UMOUNT	STD	{ int linux_oldumount(char *path); }
+23	AUE_SETUID	STD	{ int linux_setuid16(l_uid16_t uid); }
+24	AUE_GETUID	STD	{ int linux_getuid16(void); }
+25	AUE_SETTIMEOFDAY	STD	{ int linux_stime(void); }
+26	AUE_PTRACE	STD	{ int linux_ptrace(l_long req, l_long pid, \
+				    l_long addr, l_long data); }
+27	AUE_NULL	STD	{ int linux_alarm(l_uint secs); }
+28	AUE_FSTAT	UNIMPL	fstat
+29	AUE_NULL	STD	{ int linux_pause(void); }
+30	AUE_UTIME	STD	{ int linux_utime(char *fname, \
+				    struct l_utimbuf *times); }
+31	AUE_NULL	UNIMPL	stty
+32	AUE_NULL	UNIMPL	gtty
+33	AUE_ACCESS	STD	{ int linux_access(char *path, l_int amode); }
+34	AUE_NICE	STD	{ int linux_nice(l_int inc); }
+35	AUE_NULL	UNIMPL	ftime
+36	AUE_SYNC	NOPROTO	{ int sync(void); }
+37	AUE_KILL	STD	{ int linux_kill(l_int pid, l_int signum); }
+38	AUE_RENAME	STD	{ int linux_rename(char *from, char *to); }
+39	AUE_MKDIR	STD	{ int linux_mkdir(char *path, l_int mode); }
+40	AUE_RMDIR	STD	{ int linux_rmdir(char *path); }
+41	AUE_DUP		NOPROTO	{ int dup(u_int fd); }
+42	AUE_PIPE	STD	{ int linux_pipe(l_int *pipefds); }
+43	AUE_NULL	STD	{ int linux_times(struct l_times_argv *buf); }
+44	AUE_NULL	UNIMPL	prof
+45	AUE_NULL	STD	{ int linux_brk(l_ulong dsend); }
+46	AUE_SETGID	STD	{ int linux_setgid16(l_gid16_t gid); }
+47	AUE_GETGID	STD	{ int linux_getgid16(void); }
+48	AUE_NULL	STD	{ int linux_signal(l_int sig, \
+				    l_handler_t handler); }
+49	AUE_GETEUID	STD	{ int linux_geteuid16(void); }
+50	AUE_GETEGID	STD	{ int linux_getegid16(void); }
+51	AUE_ACCT	NOPROTO	{ int acct(char *path); }
+52	AUE_UMOUNT	STD	{ int linux_umount(char *path, l_int flags); }
+53	AUE_NULL	UNIMPL	lock
+54	AUE_IOCTL	STD	{ int linux_ioctl(l_uint fd, l_uint cmd, \
+				    uintptr_t arg); }
+55	AUE_FCNTL	STD	{ int linux_fcntl(l_uint fd, l_uint cmd, \
+				    uintptr_t arg); }
+56	AUE_NULL	UNIMPL	mpx
+57	AUE_SETPGRP	NOPROTO	{ int setpgid(int pid, int pgid); }
+58	AUE_NULL	UNIMPL	ulimit
+59	AUE_NULL	STD	{ int linux_olduname(void); }
+60	AUE_UMASK	NOPROTO	{ int umask(int newmask); }
+61	AUE_CHROOT	NOPROTO	{ int chroot(char *path); }
+62	AUE_NULL	STD	{ int linux_ustat(l_dev_t dev, \
+				    struct l_ustat *ubuf); }
+63	AUE_DUP2	NOPROTO	{ int dup2(u_int from, u_int to); }
+64	AUE_GETPPID	STD	{ int linux_getppid(void); }
+65	AUE_GETPGRP	NOPROTO	{ int getpgrp(void); }
+66	AUE_SETSID	NOPROTO	{ int setsid(void); }
+67	AUE_NULL	STD	{ int linux_sigaction(l_int sig, \
+				    l_osigaction_t *nsa, \
+				    l_osigaction_t *osa); }
+68	AUE_NULL	STD	{ int linux_sgetmask(void); }
+69	AUE_NULL	STD	{ int linux_ssetmask(l_osigset_t mask); }
+70	AUE_SETREUID	STD	{ int linux_setreuid16(l_uid16_t ruid, \
+				    l_uid16_t euid); }
+71	AUE_SETREGID	STD	{ int linux_setregid16(l_gid16_t rgid, \
+				    l_gid16_t egid); }
+72	AUE_NULL	STD	{ int linux_sigsuspend(l_int hist0, \
+				    l_int hist1, l_osigset_t mask); }
+73	AUE_NULL	STD	{ int linux_sigpending(l_osigset_t *mask); }
+74	AUE_SYSCTL	STD	{ int linux_sethostname(char *hostname, \
+				    u_int len); }
+75	AUE_SETRLIMIT	STD	{ int linux_setrlimit(l_uint resource, \
+				    struct l_rlimit *rlim); }
+76	AUE_GETRLIMIT	STD	{ int linux_old_getrlimit(l_uint resource, \
+				    struct l_rlimit *rlim); }
+77	AUE_GETRUSAGE	STD	{ int linux_getrusage(int who, \
+				    struct l_rusage *rusage); }
+78	AUE_NULL	STD	{ int linux_gettimeofday( \
+				    struct l_timeval *tp, \
+				    struct timezone *tzp); }
+79	AUE_SETTIMEOFDAY STD	{ int linux_settimeofday( \
+				    struct l_timeval *tp, \
+				    struct timezone *tzp); }
+80	AUE_GETGROUPS	STD	{ int linux_getgroups16(l_uint gidsetsize, \
+				    l_gid16_t *gidset); }
+81	AUE_SETGROUPS	STD	{ int linux_setgroups16(l_uint gidsetsize, \
+				    l_gid16_t *gidset); }
+82	AUE_SELECT	STD	{ int linux_old_select( \
+				    struct l_old_select_argv *ptr); }
+83	AUE_SYMLINK	STD	{ int linux_symlink(char *path, char *to); }
+; 84: oldlstat
+84	AUE_LSTAT	STD	{ int linux_lstat(char *path, struct linux_lstat *up); }
+85	AUE_READLINK	STD	{ int linux_readlink(char *name, char *buf, \
+				    l_int count); }
+86	AUE_USELIB	UNIMPL	linux_uselib
+87	AUE_SWAPON	NOPROTO	{ int swapon(char *name); }
+88	AUE_REBOOT	STD	{ int linux_reboot(l_int magic1, \
+				    l_int magic2, l_uint cmd, void *arg); }
+; 89: old_readdir
+89	AUE_GETDIRENTRIES	STD { int linux_readdir(l_uint fd, \
+				    struct l_dirent *dent, l_uint count); }
+; 90: old_mmap
+90	AUE_MMAP	STD	{ int linux_mmap(struct l_mmap_argv *ptr); }
+91	AUE_MUNMAP	NOPROTO	{ int munmap(caddr_t addr, int len); }
+92	AUE_TRUNCATE	STD	{ int linux_truncate(char *path, \
+				    l_ulong length); }
+93	AUE_FTRUNCATE	STD	{ int linux_ftruncate(int fd, long length); }
+94	AUE_FCHMOD	NOPROTO	{ int fchmod(int fd, int mode); }
+95	AUE_FCHOWN	NOPROTO	{ int fchown(int fd, int uid, int gid); }
+96	AUE_GETPRIORITY	STD	{ int linux_getpriority(int which, int who); }
+97	AUE_SETPRIORITY	NOPROTO	{ int setpriority(int which, int who, \
+				    int prio); }
+98	AUE_PROFILE	UNIMPL	profil
+99	AUE_STATFS	STD	{ int linux_statfs(char *path, \
+				    struct l_statfs_buf *buf); }
+100	AUE_FSTATFS	STD	{ int linux_fstatfs(l_uint fd, \
+				    struct l_statfs_buf *buf); }
+101	AUE_NULL	UNIMPL	ioperm
+102	AUE_NULL	STD	{ int linux_socketcall(l_int what, \
+				    l_ulong args); }
+103	AUE_NULL	STD	{ int linux_syslog(l_int type, char *buf, \
+				    l_int len); }
+104	AUE_SETITIMER	STD	{ int linux_setitimer(l_int which, \
+				    struct l_itimerval *itv, \
+				    struct l_itimerval *oitv); }
+105	AUE_GETITIMER	STD	{ int linux_getitimer(l_int which, \
+				    struct l_itimerval *itv); }
+106	AUE_STAT	STD	{ int linux_newstat(char *path, \
+				    struct l_newstat *buf); }
+107	AUE_LSTAT	STD	{ int linux_newlstat(char *path, \
+				    struct l_newstat *buf); }
+108	AUE_FSTAT	STD	{ int linux_newfstat(l_uint fd, \
+				    struct l_newstat *buf); }
+; 109: olduname
+109	AUE_NULL	STD	{ int linux_uname(void); }
+110	AUE_NULL	STD	{ int linux_iopl(l_int level); }
+111	AUE_NULL	STD	{ int linux_vhangup(void); }
+112	AUE_NULL	UNIMPL	idle
+113	AUE_NULL	UNIMPL	vm86old
+114	AUE_WAIT4	STD	{ int linux_wait4(l_pid_t pid, \
+				    l_uint *status, l_int options, \
+				    struct l_rusage *rusage); }
+115	AUE_SWAPOFF	STD	{ int linux_swapoff(void); }
+116	AUE_NULL	STD	{ int linux_sysinfo(struct l_sysinfo *info); }
+117	AUE_NULL	STD	{ int linux_ipc(l_uint what, l_int arg1, \
+				    l_int arg2, l_int arg3, void *ptr, \
+				    l_long arg5); }
+118	AUE_FSYNC	NOPROTO	{ int fsync(int fd); }
+119	AUE_SIGRETURN	STD	{ int linux_sigreturn( \
+				    struct l_sigframe *sfp); }
+120	AUE_RFORK	STD	{ int linux_clone(l_int flags, void *stack, \
+				    void *parent_tidptr, void *tls, void * child_tidptr); }
+121	AUE_SYSCTL	STD	{ int linux_setdomainname(char *name, \
+				    int len); }
+122	AUE_NULL	STD	{ int linux_newuname( \
+				    struct l_new_utsname *buf); }
+123	AUE_NULL	UNIMPL	modify_ldt
+124	AUE_ADJTIME	STD	{ int linux_adjtimex(void); }
+125	AUE_MPROTECT	STD	{ int linux_mprotect(caddr_t addr, int len, \
+				    int prot); }
+126	AUE_SIGPROCMASK	STD	{ int linux_sigprocmask(l_int how, \
+				    l_osigset_t *mask, l_osigset_t *omask); }
+127	AUE_NULL	STD	{ int linux_create_module(void); }
+128	AUE_NULL	STD	{ int linux_init_module(void); }
+129	AUE_NULL	STD	{ int linux_delete_module(void); }
+130	AUE_NULL	STD	{ int linux_get_kernel_syms(void); }
+131	AUE_QUOTACTL	STD	{ int linux_quotactl(void); }
+132	AUE_GETPGID	NOPROTO	{ int getpgid(int pid); }
+133	AUE_FCHDIR	NOPROTO	{ int fchdir(int fd); }
+134	AUE_BDFLUSH	STD	{ int linux_bdflush(void); }
+135	AUE_NULL	STD	{ int linux_sysfs(l_int option, \
+				    l_ulong arg1, l_ulong arg2); }
+136	AUE_PERSONALITY	STD	{ int linux_personality(l_ulong per); }
+137	AUE_NULL	UNIMPL	afs_syscall
+138	AUE_SETFSUID	STD	{ int linux_setfsuid16(l_uid16_t uid); }
+139	AUE_SETFSGID	STD	{ int linux_setfsgid16(l_gid16_t gid); }
+140	AUE_LSEEK	STD	{ int linux_llseek(l_int fd, l_ulong ohigh, \
+				    l_ulong olow, l_loff_t *res, \
+				    l_uint whence); }
+141	AUE_GETDIRENTRIES	STD { int linux_getdents(l_uint fd, void *dent, \
+				    l_uint count); }
+; 142: newselect
+142	AUE_SELECT	STD	{ int linux_select(l_int nfds, \
+				    l_fd_set *readfds, l_fd_set *writefds, \
+				    l_fd_set *exceptfds, \
+				    struct l_timeval *timeout); }
+143	AUE_FLOCK	NOPROTO	{ int flock(int fd, int how); }
+144	AUE_MSYNC	STD	{ int linux_msync(l_ulong addr, \
+				    l_size_t len, l_int fl); }
+145	AUE_READV	STD { int linux_readv(l_ulong fd, struct l_iovec32 *iovp, \
+				    l_ulong iovcnt); }
+146	AUE_WRITEV	STD { int linux_writev(l_ulong fd, struct l_iovec32 *iovp, \
+				    l_ulong iovcnt); }
+147	AUE_GETSID	STD	{ int linux_getsid(l_pid_t pid); }
+148	AUE_NULL	STD	{ int linux_fdatasync(l_uint fd); }
+149	AUE_SYSCTL	STD	{ int linux_sysctl( \
+				    struct l___sysctl_args *args); }
+150	AUE_MLOCK	NOPROTO	{ int mlock(const void *addr, size_t len); }
+151	AUE_MUNLOCK	NOPROTO	{ int munlock(const void *addr, size_t len); }
+152	AUE_MLOCKALL	NOPROTO	{ int mlockall(int how); }
+153	AUE_MUNLOCKALL	NOPROTO	{ int munlockall(void); }
+154	AUE_SCHED_SETPARAM	NOPROTO	{ int sched_setparam(pid_t pid, \
+				    const struct sched_param *param); }
+155	AUE_SCHED_GETPARAM	NOPROTO	{ int sched_getparam(pid_t pid, \
+				    struct sched_param *param); }
+156	AUE_SCHED_SETSCHEDULER	STD { int linux_sched_setscheduler( \
+				    l_pid_t pid, l_int policy, \
+				    struct l_sched_param *param); }
+157	AUE_SCHED_GETSCHEDULER	STD { int linux_sched_getscheduler( \
+				    l_pid_t pid); }
+158	AUE_NULL	NOPROTO	{ int sched_yield(void); }
+159	AUE_SCHED_GET_PRIORITY_MAX	STD { int linux_sched_get_priority_max( \
+				    l_int policy); }
+160	AUE_SCHED_GET_PRIORITY_MIN	STD { int linux_sched_get_priority_min( \
+				    l_int policy); }
+161	AUE_SCHED_RR_GET_INTERVAL	STD { int linux_sched_rr_get_interval(l_pid_t pid, \
+				    struct l_timespec *interval); }
+162	AUE_NULL	STD	{ int linux_nanosleep( \
+				    const struct l_timespec *rqtp, \
+				    struct l_timespec *rmtp); }
+163	AUE_NULL	STD	{ int linux_mremap(l_ulong addr, \
+				    l_ulong old_len, l_ulong new_len, \
+				    l_ulong flags, l_ulong new_addr); }
+164	AUE_SETRESUID	STD	{ int linux_setresuid16(l_uid16_t ruid, \
+				    l_uid16_t euid, l_uid16_t suid); }
+165	AUE_GETRESUID	STD	{ int linux_getresuid16(l_uid16_t *ruid, \
+				    l_uid16_t *euid, l_uid16_t *suid); }
+166	AUE_NULL	UNIMPL	vm86
+167	AUE_NULL	STD	{ int linux_query_module(void); }
+168	AUE_POLL	NOPROTO	{ int poll(struct pollfd *fds, \
+				    unsigned int nfds, int timeout); }
+169	AUE_NULL	STD	{ int linux_nfsservctl(void); }
+170	AUE_SETRESGID	STD	{ int linux_setresgid16(l_gid16_t rgid, \
+				    l_gid16_t egid, l_gid16_t sgid); }
+171	AUE_GETRESGID	STD	{ int linux_getresgid16(l_gid16_t *rgid, \
+				    l_gid16_t *egid, l_gid16_t *sgid); }
+172	AUE_PRCTL	STD	{ int linux_prctl(l_int option, l_int arg2, l_int arg3, \
+				    l_int arg4, l_int arg5); }
+173	AUE_NULL	STD	{ int linux_rt_sigreturn( \
+				    struct l_ucontext *ucp); }
+174	AUE_NULL	STD	{ int linux_rt_sigaction(l_int sig, \
+				    l_sigaction_t *act, l_sigaction_t *oact, \
+				    l_size_t sigsetsize); }
+175	AUE_NULL	STD	{ int linux_rt_sigprocmask(l_int how, \
+				    l_sigset_t *mask, l_sigset_t *omask, \
+				    l_size_t sigsetsize); }
+176	AUE_NULL	STD	{ int linux_rt_sigpending(l_sigset_t *set, \
+				    l_size_t sigsetsize); }
+177	AUE_NULL	STD	{ int linux_rt_sigtimedwait(l_sigset_t *mask, \
+				    l_siginfo_t *ptr, \
+				    struct l_timeval *timeout, \
+				    l_size_t sigsetsize); }
+178	AUE_NULL	STD	{ int linux_rt_sigqueueinfo(void); }
+179	AUE_NULL	STD	{ int linux_rt_sigsuspend( \
+				    l_sigset_t *newset, \
+				    l_size_t sigsetsize); }
+180	AUE_PREAD	STD	{ int linux_pread(l_uint fd, char *buf, \
+				    l_size_t nbyte, l_loff_t offset); }
+181	AUE_PWRITE	STD	{ int linux_pwrite(l_uint fd, char *buf, \
+				    l_size_t nbyte, l_loff_t offset); }
+182	AUE_CHOWN	STD	{ int linux_chown16(char *path, \
+				    l_uid16_t uid, l_gid16_t gid); }
+183	AUE_GETCWD	STD	{ int linux_getcwd(char *buf, \
+				    l_ulong bufsize); }
+184	AUE_CAPGET	STD	{ int linux_capget(struct l_user_cap_header *hdrp, \
+				    struct l_user_cap_data *datap); }
+185	AUE_CAPSET	STD	{ int linux_capset(struct l_user_cap_header *hdrp, \
+				    struct l_user_cap_data *datap); }
+186	AUE_NULL	STD	{ int linux_sigaltstack(l_stack_t *uss, \
+				    l_stack_t *uoss); }
+187	AUE_SENDFILE	STD	{ int linux_sendfile(void); }
+188	AUE_GETPMSG	UNIMPL	getpmsg
+189	AUE_PUTPMSG	UNIMPL	putpmsg
+190	AUE_VFORK	STD	{ int linux_vfork(void); }
+; 191: ugetrlimit
+191	AUE_GETRLIMIT	STD	{ int linux_getrlimit(l_uint resource, \
+				    struct l_rlimit *rlim); }
+192	AUE_MMAP	STD	{ int linux_mmap2(l_ulong addr, l_ulong len, \
+				    l_ulong prot, l_ulong flags, l_ulong fd, \
+				    l_ulong pgoff); }
+193	AUE_TRUNCATE	STD	{ int linux_truncate64(char *path, \
+				    l_loff_t length); }
+194	AUE_FTRUNCATE	STD	{ int linux_ftruncate64(l_uint fd, \
+				    l_loff_t length); }
+195	AUE_STAT	STD	{ int linux_stat64(const char *filename, \
+				    struct l_stat64 *statbuf); }
+196	AUE_LSTAT	STD	{ int linux_lstat64(const char *filename, \
+				    struct l_stat64 *statbuf); }
+197	AUE_FSTAT	STD	{ int linux_fstat64(l_int fd, \
+				    struct l_stat64 *statbuf); }
+198	AUE_LCHOWN	STD	{ int linux_lchown(char *path, l_uid_t uid, \
+				    l_gid_t gid); }
+199	AUE_GETUID	STD	{ int linux_getuid(void); }
+200	AUE_GETGID	STD	{ int linux_getgid(void); }
+201	AUE_GETEUID	NOPROTO	{ int geteuid(void); }
+202	AUE_GETEGID	NOPROTO	{ int getegid(void); }
+203	AUE_SETREUID	NOPROTO	{ int setreuid(uid_t ruid, uid_t euid); }
+204	AUE_SETREGID	NOPROTO	{ int setregid(gid_t rgid, gid_t egid); }
+205	AUE_GETGROUPS	STD	{ int linux_getgroups(l_int gidsetsize, \
+				    l_gid_t *grouplist); }
+206	AUE_SETGROUPS	STD	{ int linux_setgroups(l_int gidsetsize, \
+				    l_gid_t *grouplist); }
+207	AUE_FCHOWN	NODEF	fchown fchown fchown_args int
+208	AUE_SETRESUID	NOPROTO	{ int setresuid(uid_t ruid, uid_t euid, \
+				    uid_t suid); }
+209	AUE_GETRESUID	NOPROTO	{ int getresuid(uid_t *ruid, uid_t *euid, \
+				    uid_t *suid); }
+210	AUE_SETRESGID	NOPROTO	{ int setresgid(gid_t rgid, gid_t egid, \
+				    gid_t sgid); }
+211	AUE_GETRESGID	NOPROTO	{ int getresgid(gid_t *rgid, gid_t *egid, \
+				    gid_t *sgid); }
+212	AUE_CHOWN	STD	{ int linux_chown(char *path, l_uid_t uid, \
+				    l_gid_t gid); }
+213	AUE_SETUID	NOPROTO	{ int setuid(uid_t uid); }
+214	AUE_SETGID	NOPROTO	{ int setgid(gid_t gid); }
+215	AUE_SETFSUID	STD	{ int linux_setfsuid(l_uid_t uid); }
+216	AUE_SETFSGID	STD	{ int linux_setfsgid(l_gid_t gid); }
+217	AUE_PIVOT_ROOT	STD	{ int linux_pivot_root(char *new_root, \
+				    char *put_old); }
+218	AUE_MINCORE	STD	{ int linux_mincore(l_ulong start, \
+				    l_size_t len, u_char *vec); }
+219	AUE_MADVISE	NOPROTO	{ int madvise(void *addr, size_t len, \
+				    int behav); }
+220	AUE_GETDIRENTRIES	STD { int linux_getdents64(l_uint fd, \
+				    void *dirent, l_uint count); }
+221	AUE_FCNTL	STD	{ int linux_fcntl64(l_uint fd, l_uint cmd, \
+				    uintptr_t arg); }
+222	AUE_NULL	UNIMPL
+223	AUE_NULL	UNIMPL
+224	AUE_NULL	STD	{ long linux_gettid(void); }
+225	AUE_NULL	UNIMPL	linux_readahead
+226	AUE_NULL	STD	{ int linux_setxattr(void); }
+227	AUE_NULL	STD	{ int linux_lsetxattr(void); }
+228	AUE_NULL	STD	{ int linux_fsetxattr(void); }
+229	AUE_NULL	STD	{ int linux_getxattr(void); }
+230	AUE_NULL	STD	{ int linux_lgetxattr(void); }
+231	AUE_NULL	STD	{ int linux_fgetxattr(void); }
+232	AUE_NULL	STD	{ int linux_listxattr(void); }
+233	AUE_NULL	STD	{ int linux_llistxattr(void); }
+234	AUE_NULL	STD	{ int linux_flistxattr(void); }
+235	AUE_NULL	STD	{ int linux_removexattr(void); }
+236	AUE_NULL	STD	{ int linux_lremovexattr(void); }
+237	AUE_NULL	STD	{ int linux_fremovexattr(void); }
+238	AUE_NULL	STD	{ int linux_tkill(int tid, int sig); }
+239	AUE_SENDFILE	UNIMPL	linux_sendfile64
+240	AUE_NULL	STD	{ int linux_sys_futex(void *uaddr, int op, uint32_t val, \
+					struct l_timespec *timeout, uint32_t *uaddr2, uint32_t val3); }
+241	AUE_NULL	STD	{ int linux_sched_setaffinity(l_pid_t pid, l_uint len, \
+					l_ulong *user_mask_ptr); }
+242	AUE_NULL	STD	{ int linux_sched_getaffinity(l_pid_t pid, l_uint len, \
+					l_ulong *user_mask_ptr); }
+243	AUE_NULL	STD	{ int linux_set_thread_area(struct l_user_desc *desc); }
+244	AUE_NULL	UNIMPL	linux_get_thread_area
+245	AUE_NULL	UNIMPL	linux_io_setup
+246	AUE_NULL	UNIMPL	linux_io_destroy
+247	AUE_NULL	UNIMPL	linux_io_getevents
+248	AUE_NULL	UNIMPL	linux_io_submit
+249	AUE_NULL	UNIMPL	linux_io_cancel
+250	AUE_NULL	STD	{ int linux_fadvise64(int fd, l_loff_t offset, \
+					l_size_t len, int advice); }
+251	AUE_NULL	UNIMPL
+252	AUE_EXIT	STD	{ int linux_exit_group(int error_code); }
+253	AUE_NULL	STD	{ int linux_lookup_dcookie(void); }
+254	AUE_NULL	STD	{ int linux_epoll_create(void); }
+255	AUE_NULL	STD	{ int linux_epoll_ctl(void); }
+256	AUE_NULL	STD	{ int linux_epoll_wait(void); }
+257	AUE_NULL	STD	{ int linux_remap_file_pages(void); }
+258	AUE_NULL	STD	{ int linux_set_tid_address(int *tidptr); }
+259	AUE_NULL	STD	{ int linux_timer_create(void); }
+260	AUE_NULL	STD	{ int linux_timer_settime(void); }
+261	AUE_NULL	STD	{ int linux_timer_gettime(void); }
+262	AUE_NULL	STD	{ int linux_timer_getoverrun(void); }
+263	AUE_NULL	STD	{ int linux_timer_delete(void); }
+264	AUE_CLOCK_SETTIME	STD	{ int linux_clock_settime(clockid_t which, struct l_timespec *tp); }
+265	AUE_NULL	STD	{ int linux_clock_gettime(clockid_t which, struct l_timespec *tp); }
+266	AUE_NULL	STD	{ int linux_clock_getres(clockid_t which, struct l_timespec *tp); }
+267	AUE_NULL	STD	{ int linux_clock_nanosleep(clockid_t which, int flags, \
+					struct l_timespec *rqtp, struct l_timespec *rmtp); }
+268	AUE_STATFS	STD	{ int linux_statfs64(char *path, size_t bufsize, struct l_statfs64_buf *buf); }
+269	AUE_FSTATFS	STD	{ int linux_fstatfs64(void); }
+270	AUE_NULL	STD	{ int linux_tgkill(int tgid, int pid, int sig); }
+271	AUE_UTIMES	STD	{ int linux_utimes(char *fname, \
+					struct l_timeval *tptr); }
+272	AUE_NULL	STD	{ int linux_fadvise64_64(int fd, \
+					l_loff_t offset, l_loff_t len, \
+					int advice); }
+273	AUE_NULL	UNIMPL	vserver
+274	AUE_NULL	STD	{ int linux_mbind(void); }
+275	AUE_NULL	STD	{ int linux_get_mempolicy(void); }
+276	AUE_NULL	STD	{ int linux_set_mempolicy(void); }
+; linux 2.6.6:
+277	AUE_NULL	STD	{ int linux_mq_open(void); }
+278	AUE_NULL	STD	{ int linux_mq_unlink(void); }
+279	AUE_NULL	STD	{ int linux_mq_timedsend(void); }
+280	AUE_NULL	STD	{ int linux_mq_timedreceive(void); }
+281	AUE_NULL	STD	{ int linux_mq_notify(void); }
+282	AUE_NULL	STD	{ int linux_mq_getsetattr(void); }
+283	AUE_NULL	STD	{ int linux_kexec_load(void); }
+284	AUE_NULL	STD	{ int linux_waitid(void); }
+285	AUE_NULL	UNIMPL
+; linux 2.6.11:
+286	AUE_NULL	STD	{ int linux_add_key(void); }
+287	AUE_NULL	STD	{ int linux_request_key(void); }
+288	AUE_NULL	STD	{ int linux_keyctl(void); }
+; linux 2.6.13:
+289	AUE_NULL	STD	{ int linux_ioprio_set(void); }
+290	AUE_NULL	STD	{ int linux_ioprio_get(void); }
+291	AUE_NULL	STD	{ int linux_inotify_init(void); }
+292	AUE_NULL	STD	{ int linux_inotify_add_watch(void); }
+293	AUE_NULL	STD	{ int linux_inotify_rm_watch(void); }
+; linux 2.6.16:
+294	AUE_NULL	STD	{ int linux_migrate_pages(void); }
+295	AUE_OPEN_RWTC	STD	{ int linux_openat(l_int dfd, const char *filename, \
+					l_int flags, l_int mode); }
+296	AUE_MKDIRAT	STD	{ int linux_mkdirat(l_int dfd, const char *pathname, \
+					l_int mode); }
+297	AUE_MKNODAT	STD	{ int linux_mknodat(l_int dfd, const char *filename, \
+					l_int mode, l_uint dev); }
+298	AUE_FCHOWNAT	STD	{ int linux_fchownat(l_int dfd, const char *filename, \
+					l_uid16_t uid, l_gid16_t gid, l_int flag); }
+299	AUE_FUTIMESAT	STD	{ int linux_futimesat(l_int dfd, char *filename, \
+					struct l_timeval *utimes); }
+300	AUE_FSTATAT	STD	{ int linux_fstatat64(l_int dfd, char *pathname, \
+					struct l_stat64 *statbuf, l_int flag); }
+301	AUE_UNLINKAT	STD	{ int linux_unlinkat(l_int dfd, const char *pathname, \
+					l_int flag); }
+302	AUE_RENAMEAT	STD	{ int linux_renameat(l_int olddfd, const char *oldname, \
+					l_int newdfd, const char *newname); }
+303	AUE_LINKAT	STD	{ int linux_linkat(l_int olddfd, const char *oldname, \
+					l_int newdfd, const char *newname, l_int flag); }
+304	AUE_SYMLINKAT	STD	{ int linux_symlinkat(const char *oldname, l_int newdfd, \
+					const char *newname); }
+305	AUE_READLINKAT	STD	{ int linux_readlinkat(l_int dfd, const char *path, \
+					char *buf, l_int bufsiz); }
+306	AUE_FCHMODAT	STD	{ int linux_fchmodat(l_int dfd, const char *filename, \
+					l_mode_t mode); }
+307	AUE_FACCESSAT	STD	{ int linux_faccessat(l_int dfd, const char *filename, l_int amode, int flag); }
+308	AUE_NULL	STD	{ int linux_pselect6(void); }
+309	AUE_NULL	STD	{ int linux_ppoll(void); }
+310	AUE_NULL	STD	{ int linux_unshare(void); }
+; linux 2.6.17:
+311	AUE_NULL	STD	{ int linux_set_robust_list(struct linux_robust_list_head *head, \
+					l_size_t len); }
+312	AUE_NULL	STD	{ int linux_get_robust_list(l_int pid, struct linux_robust_list_head *head, \
+					l_size_t *len); }
+313	AUE_NULL	STD	{ int linux_splice(void); }
+314	AUE_NULL	STD	{ int linux_sync_file_range(void); }
+315	AUE_NULL	STD	{ int linux_tee(void); }
+316	AUE_NULL	STD	{ int linux_vmsplice(void); }
+; linux 2.6.18:
+317	AUE_NULL	STD	{ int linux_move_pages(void); }
+; linux 2.6.19:
+318	AUE_NULL	STD	{ int linux_getcpu(void); }
+319	AUE_NULL	STD	{ int linux_epoll_pwait(void); }
+; linux 2.6.22:
+320	AUE_NULL	STD	{ int linux_utimensat(void); }
+321	AUE_NULL	STD	{ int linux_signalfd(void); }
+322	AUE_NULL	STD	{ int linux_timerfd_create(void); }
+323	AUE_NULL	STD	{ int linux_eventfd(void); }
+; linux 2.6.23:
+324	AUE_NULL	STD	{ int linux_fallocate(void); }
+; linux 2.6.25:
+325	AUE_NULL	STD	{ int linux_timerfd_settime(void); }
+326	AUE_NULL	STD	{ int linux_timerfd_gettime(void); }
+; linux 2.6.27:
+327	AUE_NULL	STD	{ int linux_signalfd4(void); }
+328	AUE_NULL	STD	{ int linux_eventfd2(void); }
+329	AUE_NULL	STD	{ int linux_epoll_create1(void); }
+330	AUE_NULL	STD	{ int linux_dup3(void); }
+331	AUE_NULL	STD	{ int linux_pipe2(l_int *pipefds, l_int flags); }
+332	AUE_NULL	STD	{ int linux_inotify_init1(void); }
+; linux 2.6.30:
+333	AUE_NULL	STD	{ int linux_preadv(void); }
+334	AUE_NULL	STD	{ int linux_pwritev(void); }
+; linux 2.6.31:
+335	AUE_NULL	STD	{ int linux_rt_tsigqueueinfo(void); }
+336	AUE_NULL	STD	{ int linux_perf_event_open(void); }
+; linux 2.6.33:
+337	AUE_NULL	STD	{ int linux_recvmmsg(void); }
+338	AUE_NULL	STD	{ int linux_fanotify_init(void); }
+339	AUE_NULL	STD	{ int linux_fanotify_mark(void); }
+; linux 2.6.36:
+340	AUE_NULL	STD	{ int linux_prlimit64(void); }
+; later:
+341	AUE_NULL	STD	{ int linux_name_to_handle_at(void); }
+342	AUE_NULL	STD	{ int linux_open_by_handle_at(void); }
+343	AUE_NULL	STD	{ int linux_clock_adjtime(void); }
+344	AUE_NULL	STD	{ int linux_syncfs(void); }
+345	AUE_NULL	STD	{ int linux_sendmmsg(void); }
+346	AUE_NULL	STD	{ int linux_setns(void); }
+347	AUE_NULL	STD	{ int linux_process_vm_readv(void); }
+348	AUE_NULL	STD	{ int linux_process_vm_writev(void); }
diff --git a/sys/amd64/pci/pci_cfgreg.c b/sys/amd64/pci/pci_cfgreg.c
new file mode 100644
index 0000000..90d9087
--- /dev/null
+++ b/sys/amd64/pci/pci_cfgreg.c
@@ -0,0 +1,370 @@
+/*-
+ * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
+ * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
+ * Copyright (c) 2000, BSDi
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pci_cfgreg.h>
+
+enum {
+	CFGMECH_NONE = 0,
+	CFGMECH_1,
+	CFGMECH_PCIE,
+};
+
+static uint32_t	pci_docfgregread(int bus, int slot, int func, int reg,
+		    int bytes);
+static int	pciereg_cfgread(int bus, unsigned slot, unsigned func,
+		    unsigned reg, unsigned bytes);
+static void	pciereg_cfgwrite(int bus, unsigned slot, unsigned func,
+		    unsigned reg, int data, unsigned bytes);
+static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
+static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
+
+SYSCTL_DECL(_hw_pci);
+
+static int cfgmech;
+static vm_offset_t pcie_base;
+static int pcie_minbus, pcie_maxbus;
+static uint32_t pcie_badslots;
+static struct mtx pcicfg_mtx;
+static int mcfg_enable = 1;
+TUNABLE_INT("hw.pci.mcfg", &mcfg_enable);
+SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0,
+    "Enable support for PCI-e memory mapped config access");
+
+/* 
+ * Initialise access to PCI configuration space 
+ */
+int
+pci_cfgregopen(void)
+{
+	static int once = 0;
+	uint64_t pciebar;
+	uint16_t did, vid;
+
+	if (!once) {
+		mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN);
+		once = 1;
+	}
+
+	if (cfgmech != CFGMECH_NONE)
+		return (1);
+	cfgmech = CFGMECH_1;
+
+	/*
+	 * Grope around in the PCI config space to see if this is a
+	 * chipset that is capable of doing memory-mapped config cycles.
+	 * This also implies that it can do PCIe extended config cycles.
+	 */
+
+	/* Check for supported chipsets */
+	vid = pci_cfgregread(0, 0, 0, PCIR_VENDOR, 2);
+	did = pci_cfgregread(0, 0, 0, PCIR_DEVICE, 2);
+	switch (vid) {
+	case 0x8086:
+		switch (did) {
+		case 0x3590:
+		case 0x3592:
+			/* Intel 7520 or 7320 */
+			pciebar = pci_cfgregread(0, 0, 0, 0xce, 2) << 16;
+			pcie_cfgregopen(pciebar, 0, 255);
+			break;
+		case 0x2580:
+		case 0x2584:
+		case 0x2590:
+			/* Intel 915, 925, or 915GM */
+			pciebar = pci_cfgregread(0, 0, 0, 0x48, 4);
+			pcie_cfgregopen(pciebar, 0, 255);
+			break;
+		}
+	}
+
+	return (1);
+}
+
+static uint32_t
+pci_docfgregread(int bus, int slot, int func, int reg, int bytes)
+{
+
+	if (cfgmech == CFGMECH_PCIE &&
+	    (bus >= pcie_minbus && bus <= pcie_maxbus) &&
+	    (bus != 0 || !(1 << slot & pcie_badslots)))
+		return (pciereg_cfgread(bus, slot, func, reg, bytes));
+	else
+		return (pcireg_cfgread(bus, slot, func, reg, bytes));
+}
+
+/* 
+ * Read configuration space register
+ */
+u_int32_t
+pci_cfgregread(int bus, int slot, int func, int reg, int bytes)
+{
+	uint32_t line;
+
+	/*
+	 * Some BIOS writers seem to want to ignore the spec and put
+	 * 0 in the intline rather than 255 to indicate none.  Some use
+	 * numbers in the range 128-254 to indicate something strange and
+	 * apparently undocumented anywhere.  Assume these are completely bogus
+	 * and map them to 255, which the rest of the PCI code recognizes as
+	 * as an invalid IRQ.
+	 */
+	if (reg == PCIR_INTLINE && bytes == 1) {
+		line = pci_docfgregread(bus, slot, func, PCIR_INTLINE, 1);
+		if (line == 0 || line >= 128)
+			line = PCI_INVALID_IRQ;
+		return (line);
+	}
+	return (pci_docfgregread(bus, slot, func, reg, bytes));
+}
+
+/* 
+ * Write configuration space register 
+ */
+void
+pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes)
+{
+
+	if (cfgmech == CFGMECH_PCIE &&
+	    (bus >= pcie_minbus && bus <= pcie_maxbus) &&
+	    (bus != 0 || !(1 << slot & pcie_badslots)))
+		pciereg_cfgwrite(bus, slot, func, reg, data, bytes);
+	else
+		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
+}
+
+/* 
+ * Configuration space access using direct register operations
+ */
+
+/* enable configuration space accesses and return data port address */
+static int
+pci_cfgenable(unsigned bus, unsigned slot, unsigned func, int reg, int bytes)
+{
+	int dataport = 0;
+
+	if (bus <= PCI_BUSMAX && slot <= PCI_SLOTMAX && func <= PCI_FUNCMAX &&
+	    (unsigned)reg <= PCI_REGMAX && bytes != 3 &&
+	    (unsigned)bytes <= 4 && (reg & (bytes - 1)) == 0) {
+		outl(CONF1_ADDR_PORT, (1 << 31) | (bus << 16) | (slot << 11) 
+		    | (func << 8) | (reg & ~0x03));
+		dataport = CONF1_DATA_PORT + (reg & 0x03);
+	}
+	return (dataport);
+}
+
+/* disable configuration space accesses */
+static void
+pci_cfgdisable(void)
+{
+
+	/*
+	 * Do nothing.  Writing a 0 to the address port can apparently
+	 * confuse some bridges and cause spurious access failures.
+	 */
+}
+
+static int
+pcireg_cfgread(int bus, int slot, int func, int reg, int bytes)
+{
+	int data = -1;
+	int port;
+
+	mtx_lock_spin(&pcicfg_mtx);
+	port = pci_cfgenable(bus, slot, func, reg, bytes);
+	if (port != 0) {
+		switch (bytes) {
+		case 1:
+			data = inb(port);
+			break;
+		case 2:
+			data = inw(port);
+			break;
+		case 4:
+			data = inl(port);
+			break;
+		}
+		pci_cfgdisable();
+	}
+	mtx_unlock_spin(&pcicfg_mtx);
+	return (data);
+}
+
+static void
+pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
+{
+	int port;
+
+	mtx_lock_spin(&pcicfg_mtx);
+	port = pci_cfgenable(bus, slot, func, reg, bytes);
+	if (port != 0) {
+		switch (bytes) {
+		case 1:
+			outb(port, data);
+			break;
+		case 2:
+			outw(port, data);
+			break;
+		case 4:
+			outl(port, data);
+			break;
+		}
+		pci_cfgdisable();
+	}
+	mtx_unlock_spin(&pcicfg_mtx);
+}
+
+int
+pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
+{
+	uint32_t val1, val2;
+	int slot;
+
+	if (!mcfg_enable)
+		return (0);
+
+	if (minbus != 0)
+		return (0);
+
+	if (bootverbose)
+		printf("PCIe: Memory Mapped configuration base @ 0x%lx\n",
+		    base);
+
+	/* XXX: We should make sure this really fits into the direct map. */
+	pcie_base = (vm_offset_t)pmap_mapdev(base, (maxbus + 1) << 20);
+	pcie_minbus = minbus;
+	pcie_maxbus = maxbus;
+	cfgmech = CFGMECH_PCIE;
+
+	/*
+	 * On some AMD systems, some of the devices on bus 0 are
+	 * inaccessible using memory-mapped PCI config access.  Walk
+	 * bus 0 looking for such devices.  For these devices, we will
+	 * fall back to using type 1 config access instead.
+	 */
+	if (pci_cfgregopen() != 0) {
+		for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
+			val1 = pcireg_cfgread(0, slot, 0, 0, 4);
+			if (val1 == 0xffffffff)
+				continue;
+
+			val2 = pciereg_cfgread(0, slot, 0, 0, 4);
+			if (val2 != val1)
+				pcie_badslots |= (1 << slot);
+		}
+	}
+
+	return (1);
+}
+
+#define PCIE_VADDR(base, reg, bus, slot, func)	\
+	((base)				+	\
+	((((bus) & 0xff) << 20)		|	\
+	(((slot) & 0x1f) << 15)		|	\
+	(((func) & 0x7) << 12)		|	\
+	((reg) & 0xfff)))
+
+/*
+ * AMD BIOS And Kernel Developer's Guides for CPU families starting with 10h
+ * have a requirement that all accesses to the memory mapped PCI configuration
+ * space are done using AX class of registers.
+ * Since other vendors do not currently have any contradicting requirements
+ * the AMD access pattern is applied universally.
+ */
+
+static int
+pciereg_cfgread(int bus, unsigned slot, unsigned func, unsigned reg,
+    unsigned bytes)
+{
+	vm_offset_t va;
+	int data = -1;
+
+	if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX ||
+	    func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+		return (-1);
+
+	va = PCIE_VADDR(pcie_base, reg, bus, slot, func);
+
+	switch (bytes) {
+	case 4:
+		__asm("movl %1, %0" : "=a" (data)
+		    : "m" (*(volatile uint32_t *)va));
+		break;
+	case 2:
+		__asm("movzwl %1, %0" : "=a" (data)
+		    : "m" (*(volatile uint16_t *)va));
+		break;
+	case 1:
+		__asm("movzbl %1, %0" : "=a" (data)
+		    : "m" (*(volatile uint8_t *)va));
+		break;
+	}
+
+	return (data);
+}
+
+static void
+pciereg_cfgwrite(int bus, unsigned slot, unsigned func, unsigned reg, int data,
+    unsigned bytes)
+{
+	vm_offset_t va;
+
+	if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX ||
+	    func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+		return;
+
+	va = PCIE_VADDR(pcie_base, reg, bus, slot, func);
+
+	switch (bytes) {
+	case 4:
+		__asm("movl %1, %0" : "=m" (*(volatile uint32_t *)va)
+		    : "a" (data));
+		break;
+	case 2:
+		__asm("movw %1, %0" : "=m" (*(volatile uint16_t *)va)
+		    : "a" ((uint16_t)data));
+		break;
+	case 1:
+		__asm("movb %1, %0" : "=m" (*(volatile uint8_t *)va)
+		    : "a" ((uint8_t)data));
+		break;
+	}
+}
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c
new file mode 100644
index 0000000..dc071d3
--- /dev/null
+++ b/sys/amd64/vmm/amd/amdv.c
@@ -0,0 +1,265 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/smp.h>
+
+#include <machine/vmm.h>
+#include "io/iommu.h"
+
+static int
+amdv_init(void)
+{
+
+	printf("amdv_init: not implemented\n");
+	return (ENXIO);
+}
+
+static int
+amdv_cleanup(void)
+{
+
+	printf("amdv_cleanup: not implemented\n");
+	return (ENXIO);
+}
+
+static void *
+amdv_vminit(struct vm *vm)
+{
+
+	printf("amdv_vminit: not implemented\n");
+	return (NULL);
+}
+
+static int
+amdv_vmrun(void *arg, int vcpu, register_t rip)
+{
+
+	printf("amdv_vmrun: not implemented\n");
+	return (ENXIO);
+}
+
+static void
+amdv_vmcleanup(void *arg)
+{
+
+	printf("amdv_vmcleanup: not implemented\n");
+	return;
+}
+
+static int
+amdv_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+	    vm_memattr_t attr, int prot, boolean_t spok)
+{
+
+	printf("amdv_vmmmap_set: not implemented\n");
+	return (EINVAL);
+}
+
+static vm_paddr_t
+amdv_vmmmap_get(void *arg, vm_paddr_t gpa)
+{
+
+	printf("amdv_vmmmap_get: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
+{
+	
+	printf("amdv_getreg: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
+{
+	
+	printf("amdv_setreg: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
+{
+
+	printf("amdv_get_desc: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
+{
+
+	printf("amdv_get_desc: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_inject_event(void *vmi, int vcpu, int type, int vector,
+		  uint32_t error_code, int error_code_valid)
+{
+
+	printf("amdv_inject_event: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getcap(void *arg, int vcpu, int type, int *retval)
+{
+
+	printf("amdv_getcap: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setcap(void *arg, int vcpu, int type, int val)
+{
+
+	printf("amdv_setcap: not implemented\n");
+	return (EINVAL);
+}
+
+struct vmm_ops vmm_ops_amd = {
+	amdv_init,
+	amdv_cleanup,
+	amdv_vminit,
+	amdv_vmrun,
+	amdv_vmcleanup,
+	amdv_vmmmap_set,
+	amdv_vmmmap_get,
+	amdv_getreg,
+	amdv_setreg,
+	amdv_getdesc,
+	amdv_setdesc,
+	amdv_inject_event,
+	amdv_getcap,
+	amdv_setcap
+};
+
+static int
+amd_iommu_init(void)
+{
+
+	printf("amd_iommu_init: not implemented\n");
+	return (ENXIO);
+}
+
+static void
+amd_iommu_cleanup(void)
+{
+
+	printf("amd_iommu_cleanup: not implemented\n");
+}
+
+static void
+amd_iommu_enable(void)
+{
+
+	printf("amd_iommu_enable: not implemented\n");
+}
+
+static void
+amd_iommu_disable(void)
+{
+
+	printf("amd_iommu_disable: not implemented\n");
+}
+
+static void *
+amd_iommu_create_domain(vm_paddr_t maxaddr)
+{
+
+	printf("amd_iommu_create_domain: not implemented\n");
+	return (NULL);
+}
+
+static void
+amd_iommu_destroy_domain(void *domain)
+{
+
+	printf("amd_iommu_destroy_domain: not implemented\n");
+}
+
+static uint64_t
+amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
+			 uint64_t len)
+{
+
+	printf("amd_iommu_create_mapping: not implemented\n");
+	return (0);
+}
+
+static uint64_t
+amd_iommu_remove_mapping(void *domain, vm_paddr_t gpa, uint64_t len)
+{
+
+	printf("amd_iommu_remove_mapping: not implemented\n");
+	return (0);
+}
+
+static void
+amd_iommu_add_device(void *domain, int bus, int slot, int func)
+{
+
+	printf("amd_iommu_add_device: not implemented\n");
+}
+
+static void
+amd_iommu_remove_device(void *domain, int bus, int slot, int func)
+{
+
+	printf("amd_iommu_remove_device: not implemented\n");
+}
+
+static void
+amd_iommu_invalidate_tlb(void *domain)
+{
+
+	printf("amd_iommu_invalidate_tlb: not implemented\n");
+}
+
+struct iommu_ops iommu_ops_amd = {
+	amd_iommu_init,
+	amd_iommu_cleanup,
+	amd_iommu_enable,
+	amd_iommu_disable,
+	amd_iommu_create_domain,
+	amd_iommu_destroy_domain,
+	amd_iommu_create_mapping,
+	amd_iommu_remove_mapping,
+	amd_iommu_add_device,
+	amd_iommu_remove_device,
+	amd_iommu_invalidate_tlb,
+};
diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c
new file mode 100644
index 0000000..4f91601
--- /dev/null
+++ b/sys/amd64/vmm/intel/ept.c
@@ -0,0 +1,392 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/param.h>
+#include <machine/cpufunc.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+
+#include <machine/vmm.h>
+#include "vmx_cpufunc.h"
+#include "vmx_msr.h"
+#include "vmx.h"
+#include "ept.h"
+
+#define	EPT_PWL4(cap)			((cap) & (1UL << 6))
+#define	EPT_MEMORY_TYPE_WB(cap)		((cap) & (1UL << 14))
+#define	EPT_PDE_SUPERPAGE(cap)		((cap) & (1UL << 16))	/* 2MB pages */
+#define	EPT_PDPTE_SUPERPAGE(cap)	((cap) & (1UL << 17))	/* 1GB pages */
+#define	INVVPID_SUPPORTED(cap)		((cap) & (1UL << 32))
+#define	INVEPT_SUPPORTED(cap)		((cap) & (1UL << 20))
+
+#define	INVVPID_ALL_TYPES_MASK		0xF0000000000UL
+#define	INVVPID_ALL_TYPES_SUPPORTED(cap)	\
+	(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
+
+#define	INVEPT_ALL_TYPES_MASK		0x6000000UL
+#define	INVEPT_ALL_TYPES_SUPPORTED(cap)		\
+	(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
+
+#define	EPT_PG_RD			(1 << 0)
+#define	EPT_PG_WR			(1 << 1)
+#define	EPT_PG_EX			(1 << 2)
+#define	EPT_PG_MEMORY_TYPE(x)		((x) << 3)
+#define	EPT_PG_IGNORE_PAT		(1 << 6)
+#define	EPT_PG_SUPERPAGE		(1 << 7)
+
+#define	EPT_ADDR_MASK			((uint64_t)-1 << 12)
+
+MALLOC_DECLARE(M_VMX);
+
+static uint64_t page_sizes_mask;
+
+int
+ept_init(void)
+{
+	int page_shift;
+	uint64_t cap;
+
+	cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
+
+	/*
+	 * Verify that:
+	 * - page walk length is 4 steps
+	 * - extended page tables can be laid out in write-back memory
+	 * - invvpid instruction with all possible types is supported
+	 * - invept instruction with all possible types is supported
+	 */
+	if (!EPT_PWL4(cap) ||
+	    !EPT_MEMORY_TYPE_WB(cap) ||
+	    !INVVPID_SUPPORTED(cap) ||
+	    !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
+	    !INVEPT_SUPPORTED(cap) ||
+	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
+		return (EINVAL);
+
+	/* Set bits in 'page_sizes_mask' for each valid page size */
+	page_shift = PAGE_SHIFT;
+	page_sizes_mask = 1UL << page_shift;		/* 4KB page */
+
+	page_shift += 9;
+	if (EPT_PDE_SUPERPAGE(cap))
+		page_sizes_mask |= 1UL << page_shift;	/* 2MB superpage */
+
+	page_shift += 9;
+	if (EPT_PDPTE_SUPERPAGE(cap))
+		page_sizes_mask |= 1UL << page_shift;	/* 1GB superpage */
+
+	return (0);
+}
+
+#if 0
+static void
+ept_dump(uint64_t *ptp, int nlevels)
+{
+	int i, t, tabs;
+	uint64_t *ptpnext, ptpval;
+
+	if (--nlevels < 0)
+		return;
+
+	tabs = 3 - nlevels;
+	for (t = 0; t < tabs; t++)
+		printf("\t");
+	printf("PTP = %p\n", ptp);
+
+	for (i = 0; i < 512; i++) {
+		ptpval = ptp[i];
+
+		if (ptpval == 0)
+			continue;
+		
+		for (t = 0; t < tabs; t++)
+			printf("\t");
+		printf("%3d 0x%016lx\n", i, ptpval);
+
+		if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
+			ptpnext = (uint64_t *)
+				  PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
+			ept_dump(ptpnext, nlevels);
+		}
+	}
+}
+#endif
+
+static size_t
+ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+		   vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
+{
+	int spshift, ptpshift, ptpindex, nlevels;
+
+	/*
+	 * Compute the size of the mapping that we can accomodate.
+	 *
+	 * This is based on three factors:
+	 * - super page sizes supported by the processor
+	 * - alignment of the region starting at 'gpa' and 'hpa'
+	 * - length of the region 'len'
+	 */
+	spshift = PAGE_SHIFT;
+	if (spok)
+		spshift += (EPT_PWLEVELS - 1) * 9;
+	while (spshift >= PAGE_SHIFT) {
+		uint64_t spsize = 1UL << spshift;
+		if ((page_sizes_mask & spsize) != 0 &&
+		    (gpa & (spsize - 1)) == 0 &&
+		    (hpa & (spsize - 1)) == 0 &&
+		    length >= spsize) {
+			break;
+		}
+		spshift -= 9;
+	}
+
+	if (spshift < PAGE_SHIFT) {
+		panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
+		      "length 0x%016lx, page_sizes_mask 0x%016lx",
+		      gpa, hpa, length, page_sizes_mask);
+	}
+
+	nlevels = EPT_PWLEVELS;
+	while (--nlevels >= 0) {
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gpa >> ptpshift) & 0x1FF;
+
+		/* We have reached the leaf mapping */
+		if (spshift >= ptpshift)
+			break;
+
+		/*
+		 * We are working on a non-leaf page table page.
+		 *
+		 * Create the next level page table page if necessary and point
+		 * to it from the current page table.
+		 */
+		if (ptp[ptpindex] == 0) {
+			void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
+			ptp[ptpindex] = vtophys(nlp);
+			ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
+		}
+
+		/* Work our way down to the next level page table page */
+		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
+	}
+
+	if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
+		panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
+		      "mismatch\n", gpa, ptpshift);
+	}
+
+	if (prot != VM_PROT_NONE) {
+		/* Do the mapping */
+		ptp[ptpindex] = hpa;
+
+		/* Apply the access controls */
+		if (prot & VM_PROT_READ)
+			ptp[ptpindex] |= EPT_PG_RD;
+		if (prot & VM_PROT_WRITE)
+			ptp[ptpindex] |= EPT_PG_WR;
+		if (prot & VM_PROT_EXECUTE)
+			ptp[ptpindex] |= EPT_PG_EX;
+
+		/*
+		 * XXX should we enforce this memory type by setting the
+		 * ignore PAT bit to 1.
+		 */
+		ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
+
+		if (nlevels > 0)
+			ptp[ptpindex] |= EPT_PG_SUPERPAGE;
+	} else {
+		/* Remove the mapping */
+		ptp[ptpindex] = 0;
+	}
+
+	return (1UL << ptpshift);
+}
+
+static vm_paddr_t
+ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
+{
+	int nlevels, ptpshift, ptpindex;
+	uint64_t ptpval, hpabase, pgmask;
+
+	nlevels = EPT_PWLEVELS;
+	while (--nlevels >= 0) {
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gpa >> ptpshift) & 0x1FF;
+
+		ptpval = ptp[ptpindex];
+
+		/* Cannot make progress beyond this point */
+		if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
+			break;
+
+		if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
+			pgmask = (1UL << ptpshift) - 1;
+			hpabase = ptpval & ~pgmask;
+			return (hpabase | (gpa & pgmask));
+		}
+
+		/* Work our way down to the next level page table page */
+		ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
+	}
+
+	return ((vm_paddr_t)-1);
+}
+
+static void
+ept_free_pt_entry(pt_entry_t pte)
+{
+	if (pte == 0)
+		return;
+
+	/* sanity check */
+	if ((pte & EPT_PG_SUPERPAGE) != 0)
+		panic("ept_free_pt_entry: pte cannot have superpage bit");
+
+	return;
+}
+
+static void
+ept_free_pd_entry(pd_entry_t pde)
+{
+	pt_entry_t	*pt;
+	int		i;
+
+	if (pde == 0)
+		return;
+
+	if ((pde & EPT_PG_SUPERPAGE) == 0) {
+		pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
+		for (i = 0; i < NPTEPG; i++)
+			ept_free_pt_entry(pt[i]);
+		free(pt, M_VMX);	/* free the page table page */
+	}
+}
+
+static void
+ept_free_pdp_entry(pdp_entry_t pdpe)
+{
+	pd_entry_t 	*pd;
+	int		 i;
+
+	if (pdpe == 0)
+		return;
+
+	if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
+		pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
+		for (i = 0; i < NPDEPG; i++)
+			ept_free_pd_entry(pd[i]);
+		free(pd, M_VMX);	/* free the page directory page */
+	}
+}
+
+static void
+ept_free_pml4_entry(pml4_entry_t pml4e)
+{
+	pdp_entry_t	*pdp;
+	int		i;
+
+	if (pml4e == 0)
+		return;
+
+	if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
+		pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
+		for (i = 0; i < NPDPEPG; i++)
+			ept_free_pdp_entry(pdp[i]);
+		free(pdp, M_VMX);	/* free the page directory ptr page */
+	}
+}
+
+void
+ept_vmcleanup(struct vmx *vmx)
+{
+	int 		 i;
+
+	for (i = 0; i < NPML4EPG; i++)
+		ept_free_pml4_entry(vmx->pml4ept[i]);
+}
+
+int
+ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
+		vm_memattr_t attr, int prot, boolean_t spok)
+{
+	size_t n;
+	struct vmx *vmx = arg;
+
+	while (len > 0) {
+		n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
+				       prot, spok);
+		len -= n;
+		gpa += n;
+		hpa += n;
+	}
+
+	return (0);
+}
+
+vm_paddr_t
+ept_vmmmap_get(void *arg, vm_paddr_t gpa)
+{
+	vm_paddr_t hpa;
+	struct vmx *vmx;
+
+	vmx = arg;
+	hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
+	return (hpa);
+}
+
+static void
+invept_single_context(void *arg)
+{
+	struct invept_desc desc = *(struct invept_desc *)arg;
+
+	invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
+}
+
+void
+ept_invalidate_mappings(u_long pml4ept)
+{
+	struct invept_desc invept_desc = { 0 };
+
+	invept_desc.eptp = EPTP(pml4ept);
+
+	smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
+}
diff --git a/sys/amd64/vmm/intel/ept.h b/sys/amd64/vmm/intel/ept.h
new file mode 100644
index 0000000..2d7258d
--- /dev/null
+++ b/sys/amd64/vmm/intel/ept.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_EPT_H_
+#define	_EPT_H_
+
+struct vmx;
+
+#define	EPT_PWLEVELS	4		/* page walk levels */
+#define	EPTP(pml4)	((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
+
+int	ept_init(void);
+int	ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+	    vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
+vm_paddr_t ept_vmmmap_get(void *arg, vm_paddr_t gpa);
+void	ept_invalidate_mappings(u_long ept_pml4);
+void	ept_vmcleanup(struct vmx *vmx);
+#endif
diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c
new file mode 100644
index 0000000..a5784dd
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmcs.c
@@ -0,0 +1,551 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_ddb.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/pcpu.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/segments.h>
+#include <machine/pmap.h>
+
+#include <machine/vmm.h>
+#include "vmm_host.h"
+#include "vmcs.h"
+#include "vmx_cpufunc.h"
+#include "ept.h"
+#include "vmx.h"
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static uint64_t
+vmcs_fix_regval(uint32_t encoding, uint64_t val)
+{
+
+	switch (encoding) {
+	case VMCS_GUEST_CR0:
+		val = vmx_fix_cr0(val);
+		break;
+	case VMCS_GUEST_CR4:
+		val = vmx_fix_cr4(val);
+		break;
+	default:
+		break;
+	}
+	return (val);
+}
+
+static uint32_t
+vmcs_field_encoding(int ident)
+{
+	switch (ident) {
+	case VM_REG_GUEST_CR0:
+		return (VMCS_GUEST_CR0);
+	case VM_REG_GUEST_CR3:
+		return (VMCS_GUEST_CR3);
+	case VM_REG_GUEST_CR4:
+		return (VMCS_GUEST_CR4);
+	case VM_REG_GUEST_DR7:
+		return (VMCS_GUEST_DR7);
+	case VM_REG_GUEST_RSP:
+		return (VMCS_GUEST_RSP);
+	case VM_REG_GUEST_RIP:
+		return (VMCS_GUEST_RIP);
+	case VM_REG_GUEST_RFLAGS:
+		return (VMCS_GUEST_RFLAGS);
+	case VM_REG_GUEST_ES:
+		return (VMCS_GUEST_ES_SELECTOR);
+	case VM_REG_GUEST_CS:
+		return (VMCS_GUEST_CS_SELECTOR);
+	case VM_REG_GUEST_SS:
+		return (VMCS_GUEST_SS_SELECTOR);
+	case VM_REG_GUEST_DS:
+		return (VMCS_GUEST_DS_SELECTOR);
+	case VM_REG_GUEST_FS:
+		return (VMCS_GUEST_FS_SELECTOR);
+	case VM_REG_GUEST_GS:
+		return (VMCS_GUEST_GS_SELECTOR);
+	case VM_REG_GUEST_TR:
+		return (VMCS_GUEST_TR_SELECTOR);
+	case VM_REG_GUEST_LDTR:
+		return (VMCS_GUEST_LDTR_SELECTOR);
+	case VM_REG_GUEST_EFER:
+		return (VMCS_GUEST_IA32_EFER);
+	default:
+		return (-1);
+	}
+
+}
+
+static int
+vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
+{
+
+	switch (seg) {
+	case VM_REG_GUEST_ES:
+		*base = VMCS_GUEST_ES_BASE;
+		*lim = VMCS_GUEST_ES_LIMIT;
+		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_CS:
+		*base = VMCS_GUEST_CS_BASE;
+		*lim = VMCS_GUEST_CS_LIMIT;
+		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_SS:
+		*base = VMCS_GUEST_SS_BASE;
+		*lim = VMCS_GUEST_SS_LIMIT;
+		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_DS:
+		*base = VMCS_GUEST_DS_BASE;
+		*lim = VMCS_GUEST_DS_LIMIT;
+		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_FS:
+		*base = VMCS_GUEST_FS_BASE;
+		*lim = VMCS_GUEST_FS_LIMIT;
+		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_GS:
+		*base = VMCS_GUEST_GS_BASE;
+		*lim = VMCS_GUEST_GS_LIMIT;
+		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_TR:
+		*base = VMCS_GUEST_TR_BASE;
+		*lim = VMCS_GUEST_TR_LIMIT;
+		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_LDTR:
+		*base = VMCS_GUEST_LDTR_BASE;
+		*lim = VMCS_GUEST_LDTR_LIMIT;
+		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_IDTR:
+		*base = VMCS_GUEST_IDTR_BASE;
+		*lim = VMCS_GUEST_IDTR_LIMIT;
+		*acc = VMCS_INVALID_ENCODING;
+		break;
+	case VM_REG_GUEST_GDTR:
+		*base = VMCS_GUEST_GDTR_BASE;
+		*lim = VMCS_GUEST_GDTR_LIMIT;
+		*acc = VMCS_INVALID_ENCODING;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
+{
+	int error;
+	uint32_t encoding;
+
+	/*
+	 * If we need to get at vmx-specific state in the VMCS we can bypass
+	 * the translation of 'ident' to 'encoding' by simply setting the
+	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
+	 * set to 0) in the encodings for the VMCS so we are free to use the
+	 * sign bit.
+	 */
+	if (ident < 0)
+		encoding = ident & 0x7fffffff;
+	else
+		encoding = vmcs_field_encoding(ident);
+
+	if (encoding == (uint32_t)-1)
+		return (EINVAL);
+
+	VMPTRLD(vmcs);
+	error = vmread(encoding, retval);
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
+{
+	int error;
+	uint32_t encoding;
+
+	if (ident < 0)
+		encoding = ident & 0x7fffffff;
+	else
+		encoding = vmcs_field_encoding(ident);
+
+	if (encoding == (uint32_t)-1)
+		return (EINVAL);
+
+	val = vmcs_fix_regval(encoding, val);
+
+	VMPTRLD(vmcs);
+	error = vmwrite(encoding, val);
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+	if (error != 0)
+		panic("vmcs_setdesc: invalid segment register %d", seg);
+
+	VMPTRLD(vmcs);
+	if ((error = vmwrite(base, desc->base)) != 0)
+		goto done;
+
+	if ((error = vmwrite(limit, desc->limit)) != 0)
+		goto done;
+
+	if (access != VMCS_INVALID_ENCODING) {
+		if ((error = vmwrite(access, desc->access)) != 0)
+			goto done;
+	}
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+	uint64_t u64;
+
+	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+	if (error != 0)
+		panic("vmcs_getdesc: invalid segment register %d", seg);
+
+	VMPTRLD(vmcs);
+	if ((error = vmread(base, &u64)) != 0)
+		goto done;
+	desc->base = u64;
+
+	if ((error = vmread(limit, &u64)) != 0)
+		goto done;
+	desc->limit = u64;
+
+	if (access != VMCS_INVALID_ENCODING) {
+		if ((error = vmread(access, &u64)) != 0)
+			goto done;
+		desc->access = u64;
+	}
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
+{
+	int error;
+
+	VMPTRLD(vmcs);
+
+	/*
+	 * Guest MSRs are saved in the VM-exit MSR-store area.
+	 * Guest MSRs are loaded from the VM-entry MSR-load area.
+	 * Both areas point to the same location in memory.
+	 */
+	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
+		goto done;
+
+	error = 0;
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_set_defaults(struct vmcs *vmcs,
+		  u_long host_rip, u_long host_rsp, u_long ept_pml4,
+		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
+		  uint32_t procbased_ctls2, uint32_t exit_ctls,
+		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
+{
+	int error, codesel, datasel, tsssel;
+	u_long cr0, cr4, efer;
+	uint64_t eptp, pat, fsbase, idtrbase;
+	uint32_t exc_bitmap;
+
+	codesel = vmm_get_host_codesel();
+	datasel = vmm_get_host_datasel();
+	tsssel = vmm_get_host_tsssel();
+
+	/*
+	 * Make sure we have a "current" VMCS to work with.
+	 */
+	VMPTRLD(vmcs);
+
+	/*
+	 * Load the VMX controls
+	 */
+	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
+		goto done;
+
+	/* Guest state */
+
+	/* Initialize guest IA32_PAT MSR with the default value */
+	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
+	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
+	      PAT_VALUE(2, PAT_UNCACHED)	|
+	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
+	      PAT_VALUE(4, PAT_WRITE_BACK)	|
+	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
+	      PAT_VALUE(6, PAT_UNCACHED)	|
+	      PAT_VALUE(7, PAT_UNCACHEABLE);
+	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
+		goto done;
+
+	/* Host state */
+
+	/* Initialize host IA32_PAT MSR */
+	pat = vmm_get_host_pat();
+	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
+		goto done;
+
+	/* Load the IA32_EFER MSR */
+	efer = vmm_get_host_efer();
+	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
+		goto done;
+
+	/* Load the control registers */
+
+	cr0 = vmm_get_host_cr0();
+	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
+		goto done;
+	
+	cr4 = vmm_get_host_cr4() | CR4_VMXE;
+	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
+		goto done;
+
+	/* Load the segment selectors */
+	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
+		goto done;
+
+	/*
+	 * Load the Base-Address for %fs and idtr.
+	 *
+	 * Note that we exclude %gs, tss and gdtr here because their base
+	 * address is pcpu specific.
+	 */
+	fsbase = vmm_get_host_fsbase();
+	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
+		goto done;
+
+	idtrbase = vmm_get_host_idtrbase();
+	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
+		goto done;
+
+	/* instruction pointer */
+	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
+		goto done;
+
+	/* stack pointer */
+	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
+		goto done;
+
+	/* eptp */
+	eptp = EPTP(ept_pml4);
+	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
+		goto done;
+
+	/* vpid */
+	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
+		goto done;
+
+	/* msr bitmap */
+	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
+		goto done;
+
+	/* exception bitmap */
+	exc_bitmap = 1 << IDT_MC;
+	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
+		goto done;
+
+	/* link pointer */
+	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
+		goto done;
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+uint64_t
+vmcs_read(uint32_t encoding)
+{
+	int error;
+	uint64_t val;
+
+	error = vmread(encoding, &val);
+	if (error != 0)
+		panic("vmcs_read(%u) error %d", encoding, error);
+
+	return (val);
+}
+
+#ifdef DDB
+extern int vmxon_enabled[];
+
+DB_SHOW_COMMAND(vmcs, db_show_vmcs)
+{
+	uint64_t cur_vmcs, val;
+	uint32_t exit;
+
+	if (!vmxon_enabled[curcpu]) {
+		db_printf("VMX not enabled\n");
+		return;
+	}
+
+	if (have_addr) {
+		db_printf("Only current VMCS supported\n");
+		return;
+	}
+
+	vmptrst(&cur_vmcs);
+	if (cur_vmcs == VMCS_INITIAL) {
+		db_printf("No current VM context\n");
+		return;
+	}
+	db_printf("VMCS: %jx\n", cur_vmcs);
+	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
+	db_printf("Activity: ");
+	val = vmcs_read(VMCS_GUEST_ACTIVITY);
+	switch (val) {
+	case 0:
+		db_printf("Active");
+		break;
+	case 1:
+		db_printf("HLT");
+		break;
+	case 2:
+		db_printf("Shutdown");
+		break;
+	case 3:
+		db_printf("Wait for SIPI");
+		break;
+	default:
+		db_printf("Unknown: %#lx", val);
+	}
+	db_printf("\n");
+	exit = vmcs_read(VMCS_EXIT_REASON);
+	if (exit & 0x80000000)
+		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
+	else
+		db_printf("Exit Reason: %u\n", exit & 0xffff);
+	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
+	db_printf("Guest Linear Address: %#lx\n",
+	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
+	switch (exit & 0x8000ffff) {
+	case EXIT_REASON_EXCEPTION:
+	case EXIT_REASON_EXT_INTR:
+		val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
+		db_printf("Interrupt Type: ");
+		switch (val >> 8 & 0x7) {
+		case 0:
+			db_printf("external");
+			break;
+		case 2:
+			db_printf("NMI");
+			break;
+		case 3:
+			db_printf("HW exception");
+			break;
+		case 4:
+			db_printf("SW exception");
+			break;
+		default:
+			db_printf("?? %lu", val >> 8 & 0x7);
+			break;
+		}
+		db_printf("  Vector: %lu", val & 0xff);
+		if (val & 0x800)
+			db_printf("  Error Code: %lx",
+			    vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
+		db_printf("\n");
+		break;
+	case EXIT_REASON_EPT_FAULT:
+	case EXIT_REASON_EPT_MISCONFIG:
+		db_printf("Guest Physical Address: %#lx\n",
+		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
+		break;
+	}
+	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
+}
+#endif
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
new file mode 100644
index 0000000..f39eed2
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -0,0 +1,338 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMCS_H_
+#define	_VMCS_H_
+
+#ifdef _KERNEL
+struct vmcs {
+	uint32_t	identifier;
+	uint32_t	abort_code;
+	char		_impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
+};
+CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
+
+/* MSR save region is composed of an array of 'struct msr_entry' */
+struct msr_entry {
+	uint32_t	index;
+	uint32_t	reserved;
+	uint64_t	val;
+
+};
+
+int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
+int	vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
+			  u_long ept_pml4,
+			  uint32_t pinbased_ctls, uint32_t procbased_ctls,
+			  uint32_t procbased_ctls2, uint32_t exit_ctls,
+			  uint32_t entry_ctls, u_long msr_bitmap,
+			  uint16_t vpid);
+int	vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval);
+int	vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val);
+int	vmcs_getdesc(struct vmcs *vmcs, int ident,
+		     struct seg_desc *desc);
+int	vmcs_setdesc(struct vmcs *vmcs, int ident,
+		     struct seg_desc *desc);
+uint64_t vmcs_read(uint32_t encoding);
+
+#define	vmexit_instruction_length()	vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
+#define	vmcs_guest_rip()		vmcs_read(VMCS_GUEST_RIP)
+#define	vmcs_instruction_error()	vmcs_read(VMCS_INSTRUCTION_ERROR)
+#define	vmcs_exit_reason()		(vmcs_read(VMCS_EXIT_REASON) & 0xffff)
+#define	vmcs_exit_qualification()	vmcs_read(VMCS_EXIT_QUALIFICATION)
+#define	vmcs_guest_cr3()		vmcs_read(VMCS_GUEST_CR3)
+#define	vmcs_gpa()			vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
+#define	vmcs_gla()			vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
+
+#endif	/* _KERNEL */
+
+#define	VMCS_INITIAL			0xffffffffffffffff
+
+#define	VMCS_IDENT(encoding)		((encoding) | 0x80000000)
+/*
+ * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
+ */
+#define	VMCS_INVALID_ENCODING		0xffffffff
+
+/* 16-bit control fields */
+#define	VMCS_VPID			0x00000000
+
+/* 16-bit guest-state fields */
+#define	VMCS_GUEST_ES_SELECTOR		0x00000800
+#define	VMCS_GUEST_CS_SELECTOR		0x00000802
+#define	VMCS_GUEST_SS_SELECTOR		0x00000804
+#define	VMCS_GUEST_DS_SELECTOR		0x00000806
+#define	VMCS_GUEST_FS_SELECTOR		0x00000808
+#define	VMCS_GUEST_GS_SELECTOR		0x0000080A
+#define	VMCS_GUEST_LDTR_SELECTOR	0x0000080C
+#define	VMCS_GUEST_TR_SELECTOR		0x0000080E
+
+/* 16-bit host-state fields */
+#define	VMCS_HOST_ES_SELECTOR		0x00000C00
+#define	VMCS_HOST_CS_SELECTOR		0x00000C02
+#define	VMCS_HOST_SS_SELECTOR		0x00000C04
+#define	VMCS_HOST_DS_SELECTOR		0x00000C06
+#define	VMCS_HOST_FS_SELECTOR		0x00000C08
+#define	VMCS_HOST_GS_SELECTOR		0x00000C0A
+#define	VMCS_HOST_TR_SELECTOR		0x00000C0C
+
+/* 64-bit control fields */
+#define	VMCS_IO_BITMAP_A		0x00002000
+#define	VMCS_IO_BITMAP_B		0x00002002
+#define	VMCS_MSR_BITMAP			0x00002004
+#define	VMCS_EXIT_MSR_STORE		0x00002006
+#define	VMCS_EXIT_MSR_LOAD		0x00002008
+#define	VMCS_ENTRY_MSR_LOAD		0x0000200A
+#define	VMCS_EXECUTIVE_VMCS		0x0000200C
+#define	VMCS_TSC_OFFSET			0x00002010
+#define	VMCS_VIRTUAL_APIC		0x00002012
+#define	VMCS_APIC_ACCESS		0x00002014
+#define	VMCS_EPTP			0x0000201A
+
+/* 64-bit read-only fields */
+#define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
+
+/* 64-bit guest-state fields */
+#define	VMCS_LINK_POINTER		0x00002800
+#define	VMCS_GUEST_IA32_DEBUGCTL	0x00002802
+#define	VMCS_GUEST_IA32_PAT		0x00002804
+#define	VMCS_GUEST_IA32_EFER		0x00002806
+#define	VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
+#define	VMCS_GUEST_PDPTE0		0x0000280A
+#define	VMCS_GUEST_PDPTE1		0x0000280C
+#define	VMCS_GUEST_PDPTE2		0x0000280E
+#define	VMCS_GUEST_PDPTE3		0x00002810
+
+/* 64-bit host-state fields */
+#define	VMCS_HOST_IA32_PAT		0x00002C00
+#define	VMCS_HOST_IA32_EFER		0x00002C02
+#define	VMCS_HOST_IA32_PERF_GLOBAL_CTRL	0x00002C04
+
+/* 32-bit control fields */
+#define	VMCS_PIN_BASED_CTLS		0x00004000
+#define	VMCS_PRI_PROC_BASED_CTLS	0x00004002
+#define	VMCS_EXCEPTION_BITMAP		0x00004004
+#define	VMCS_PF_ERROR_MASK		0x00004006
+#define	VMCS_PF_ERROR_MATCH		0x00004008
+#define	VMCS_CR3_TARGET_COUNT		0x0000400A
+#define	VMCS_EXIT_CTLS			0x0000400C
+#define	VMCS_EXIT_MSR_STORE_COUNT	0x0000400E
+#define	VMCS_EXIT_MSR_LOAD_COUNT	0x00004010
+#define	VMCS_ENTRY_CTLS			0x00004012
+#define	VMCS_ENTRY_MSR_LOAD_COUNT	0x00004014
+#define	VMCS_ENTRY_INTR_INFO		0x00004016
+#define	VMCS_ENTRY_EXCEPTION_ERROR	0x00004018
+#define	VMCS_ENTRY_INST_LENGTH		0x0000401A
+#define	VMCS_TPR_THRESHOLD		0x0000401C
+#define	VMCS_SEC_PROC_BASED_CTLS	0x0000401E
+#define	VMCS_PLE_GAP			0x00004020
+#define	VMCS_PLE_WINDOW			0x00004022
+
+/* 32-bit read-only data fields */
+#define	VMCS_INSTRUCTION_ERROR		0x00004400
+#define	VMCS_EXIT_REASON		0x00004402
+#define	VMCS_EXIT_INTERRUPTION_INFO	0x00004404
+#define	VMCS_EXIT_INTERRUPTION_ERROR	0x00004406
+#define	VMCS_IDT_VECTORING_INFO		0x00004408
+#define	VMCS_IDT_VECTORING_ERROR	0x0000440A
+#define	VMCS_EXIT_INSTRUCTION_LENGTH	0x0000440C
+#define	VMCS_EXIT_INSTRUCTION_INFO	0x0000440E
+
+/* 32-bit guest-state fields */
+#define	VMCS_GUEST_ES_LIMIT		0x00004800
+#define	VMCS_GUEST_CS_LIMIT		0x00004802
+#define	VMCS_GUEST_SS_LIMIT		0x00004804
+#define	VMCS_GUEST_DS_LIMIT		0x00004806
+#define	VMCS_GUEST_FS_LIMIT		0x00004808
+#define	VMCS_GUEST_GS_LIMIT		0x0000480A
+#define	VMCS_GUEST_LDTR_LIMIT		0x0000480C
+#define	VMCS_GUEST_TR_LIMIT		0x0000480E
+#define	VMCS_GUEST_GDTR_LIMIT		0x00004810
+#define	VMCS_GUEST_IDTR_LIMIT		0x00004812
+#define	VMCS_GUEST_ES_ACCESS_RIGHTS	0x00004814
+#define	VMCS_GUEST_CS_ACCESS_RIGHTS	0x00004816
+#define	VMCS_GUEST_SS_ACCESS_RIGHTS	0x00004818
+#define	VMCS_GUEST_DS_ACCESS_RIGHTS	0x0000481A
+#define	VMCS_GUEST_FS_ACCESS_RIGHTS	0x0000481C
+#define	VMCS_GUEST_GS_ACCESS_RIGHTS	0x0000481E
+#define	VMCS_GUEST_LDTR_ACCESS_RIGHTS	0x00004820
+#define	VMCS_GUEST_TR_ACCESS_RIGHTS	0x00004822
+#define	VMCS_GUEST_INTERRUPTIBILITY	0x00004824
+#define	VMCS_GUEST_ACTIVITY		0x00004826
+#define VMCS_GUEST_SMBASE		0x00004828
+#define	VMCS_GUEST_IA32_SYSENTER_CS	0x0000482A
+#define	VMCS_PREEMPTION_TIMER_VALUE	0x0000482E
+
+/* 32-bit host state fields */
+#define	VMCS_HOST_IA32_SYSENTER_CS	0x00004C00
+
+/* Natural Width control fields */
+#define	VMCS_CR0_MASK			0x00006000
+#define	VMCS_CR4_MASK			0x00006002
+#define	VMCS_CR0_SHADOW			0x00006004
+#define	VMCS_CR4_SHADOW			0x00006006
+#define	VMCS_CR3_TARGET0		0x00006008
+#define	VMCS_CR3_TARGET1		0x0000600A
+#define	VMCS_CR3_TARGET2		0x0000600C
+#define	VMCS_CR3_TARGET3		0x0000600E
+
+/* Natural Width read-only fields */
+#define	VMCS_EXIT_QUALIFICATION		0x00006400
+#define	VMCS_IO_RCX			0x00006402
+#define	VMCS_IO_RSI			0x00006404
+#define	VMCS_IO_RDI			0x00006406
+#define	VMCS_IO_RIP			0x00006408
+#define	VMCS_GUEST_LINEAR_ADDRESS	0x0000640A
+
+/* Natural Width guest-state fields */
+#define	VMCS_GUEST_CR0			0x00006800
+#define	VMCS_GUEST_CR3			0x00006802
+#define	VMCS_GUEST_CR4			0x00006804
+#define	VMCS_GUEST_ES_BASE		0x00006806
+#define	VMCS_GUEST_CS_BASE		0x00006808
+#define	VMCS_GUEST_SS_BASE		0x0000680A
+#define	VMCS_GUEST_DS_BASE		0x0000680C
+#define	VMCS_GUEST_FS_BASE		0x0000680E
+#define	VMCS_GUEST_GS_BASE		0x00006810
+#define	VMCS_GUEST_LDTR_BASE		0x00006812
+#define	VMCS_GUEST_TR_BASE		0x00006814
+#define	VMCS_GUEST_GDTR_BASE		0x00006816
+#define	VMCS_GUEST_IDTR_BASE		0x00006818
+#define	VMCS_GUEST_DR7			0x0000681A
+#define	VMCS_GUEST_RSP			0x0000681C
+#define	VMCS_GUEST_RIP			0x0000681E
+#define	VMCS_GUEST_RFLAGS		0x00006820
+#define	VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
+#define	VMCS_GUEST_IA32_SYSENTER_ESP	0x00006824
+#define	VMCS_GUEST_IA32_SYSENTER_EIP	0x00006826
+
+/* Natural Width host-state fields */
+#define	VMCS_HOST_CR0			0x00006C00
+#define	VMCS_HOST_CR3			0x00006C02
+#define	VMCS_HOST_CR4			0x00006C04
+#define	VMCS_HOST_FS_BASE		0x00006C06
+#define	VMCS_HOST_GS_BASE		0x00006C08
+#define	VMCS_HOST_TR_BASE		0x00006C0A
+#define	VMCS_HOST_GDTR_BASE		0x00006C0C
+#define	VMCS_HOST_IDTR_BASE		0x00006C0E
+#define	VMCS_HOST_IA32_SYSENTER_ESP	0x00006C10
+#define	VMCS_HOST_IA32_SYSENTER_EIP	0x00006C12
+#define	VMCS_HOST_RSP			0x00006C14
+#define	VMCS_HOST_RIP			0x00006c16
+
+/*
+ * VM instruction error numbers
+ */
+#define	VMRESUME_WITH_NON_LAUNCHED_VMCS	5
+
+/*
+ * VMCS exit reasons
+ */
+#define EXIT_REASON_EXCEPTION		0
+#define EXIT_REASON_EXT_INTR		1
+#define EXIT_REASON_TRIPLE_FAULT	2
+#define EXIT_REASON_INIT		3
+#define EXIT_REASON_SIPI		4
+#define EXIT_REASON_IO_SMI		5
+#define EXIT_REASON_SMI			6
+#define EXIT_REASON_INTR_WINDOW		7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH		9
+#define EXIT_REASON_CPUID		10
+#define EXIT_REASON_GETSEC		11
+#define EXIT_REASON_HLT			12
+#define EXIT_REASON_INVD		13
+#define EXIT_REASON_INVLPG		14
+#define EXIT_REASON_RDPMC		15
+#define EXIT_REASON_RDTSC		16
+#define EXIT_REASON_RSM			17
+#define EXIT_REASON_VMCALL		18
+#define EXIT_REASON_VMCLEAR		19
+#define EXIT_REASON_VMLAUNCH		20
+#define EXIT_REASON_VMPTRLD		21
+#define EXIT_REASON_VMPTRST		22
+#define EXIT_REASON_VMREAD		23
+#define EXIT_REASON_VMRESUME		24
+#define EXIT_REASON_VMWRITE		25
+#define EXIT_REASON_VMXOFF		26
+#define EXIT_REASON_VMXON		27
+#define EXIT_REASON_CR_ACCESS		28
+#define EXIT_REASON_DR_ACCESS		29
+#define EXIT_REASON_INOUT		30
+#define EXIT_REASON_RDMSR		31
+#define EXIT_REASON_WRMSR		32
+#define EXIT_REASON_INVAL_VMCS		33
+#define EXIT_REASON_INVAL_MSR		34
+#define EXIT_REASON_MWAIT		36
+#define EXIT_REASON_MTF			37
+#define EXIT_REASON_MONITOR		39
+#define EXIT_REASON_PAUSE		40
+#define EXIT_REASON_MCE			41
+#define EXIT_REASON_TPR			43
+#define EXIT_REASON_APIC		44
+#define EXIT_REASON_GDTR_IDTR		46
+#define EXIT_REASON_LDTR_TR		47
+#define EXIT_REASON_EPT_FAULT		48
+#define EXIT_REASON_EPT_MISCONFIG	49
+#define EXIT_REASON_INVEPT		50
+#define EXIT_REASON_RDTSCP		51
+#define EXIT_REASON_VMX_PREEMPT		52
+#define EXIT_REASON_INVVPID		53
+#define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
+
+/*
+ * VMCS interrupt information fields
+ */
+#define	VMCS_INTERRUPTION_INFO_VALID	(1U << 31)
+#define	VMCS_INTERRUPTION_INFO_HW_INTR	(0 << 8)
+#define	VMCS_INTERRUPTION_INFO_NMI	(2 << 8)
+
+/*
+ * VMCS Guest interruptibility field
+ */
+#define	VMCS_INTERRUPTIBILITY_STI_BLOCKING	(1 << 0)
+#define	VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING	(1 << 1)
+#define	VMCS_INTERRUPTIBILITY_SMI_BLOCKING	(1 << 2)
+#define	VMCS_INTERRUPTIBILITY_NMI_BLOCKING	(1 << 3)
+
+/*
+ * Exit qualification for EXIT_REASON_INVAL_VMCS
+ */
+#define	EXIT_QUAL_NMI_WHILE_STI_BLOCKING	3
+
+/*
+ * Exit qualification for EPT violation
+ */
+#define	EPT_VIOLATION_DATA_READ		(1UL << 0)
+#define	EPT_VIOLATION_DATA_WRITE	(1UL << 1)
+#define	EPT_VIOLATION_INST_FETCH	(1UL << 2)
+#define	EPT_VIOLATION_GLA_VALID		(1UL << 7)
+#define	EPT_VIOLATION_XLAT_VALID	(1UL << 8)
+
+#endif
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
new file mode 100644
index 0000000..fb41074
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -0,0 +1,1867 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/psl.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/vmparam.h>
+
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+#include "vmm_host.h"
+#include "vmm_lapic.h"
+#include "vmm_msr.h"
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+
+#include "vmx_msr.h"
+#include "ept.h"
+#include "vmx_cpufunc.h"
+#include "vmx.h"
+#include "x86.h"
+#include "vmx_controls.h"
+
+#define	PINBASED_CTLS_ONE_SETTING					\
+	(PINBASED_EXTINT_EXITING	|				\
+	 PINBASED_NMI_EXITING		|				\
+	 PINBASED_VIRTUAL_NMI)
+#define	PINBASED_CTLS_ZERO_SETTING	0
+
+#define PROCBASED_CTLS_WINDOW_SETTING					\
+	(PROCBASED_INT_WINDOW_EXITING	|				\
+	 PROCBASED_NMI_WINDOW_EXITING)
+
+#define	PROCBASED_CTLS_ONE_SETTING 					\
+	(PROCBASED_SECONDARY_CONTROLS	|				\
+	 PROCBASED_IO_EXITING		|				\
+	 PROCBASED_MSR_BITMAPS		|				\
+	 PROCBASED_CTLS_WINDOW_SETTING)
+#define	PROCBASED_CTLS_ZERO_SETTING	\
+	(PROCBASED_CR3_LOAD_EXITING |	\
+	PROCBASED_CR3_STORE_EXITING |	\
+	PROCBASED_IO_BITMAPS)
+
+#define	PROCBASED_CTLS2_ONE_SETTING	PROCBASED2_ENABLE_EPT
+#define	PROCBASED_CTLS2_ZERO_SETTING	0
+
+#define VM_EXIT_CTLS_ONE_SETTING_NO_PAT					\
+	(VM_EXIT_HOST_LMA			|			\
+	VM_EXIT_SAVE_EFER			|			\
+	VM_EXIT_LOAD_EFER)
+
+#define	VM_EXIT_CTLS_ONE_SETTING					\
+	(VM_EXIT_CTLS_ONE_SETTING_NO_PAT       	|			\
+	VM_EXIT_SAVE_PAT			|			\
+	VM_EXIT_LOAD_PAT)
+#define	VM_EXIT_CTLS_ZERO_SETTING	VM_EXIT_SAVE_DEBUG_CONTROLS
+
+#define	VM_ENTRY_CTLS_ONE_SETTING_NO_PAT	VM_ENTRY_LOAD_EFER
+
+#define	VM_ENTRY_CTLS_ONE_SETTING					\
+	(VM_ENTRY_CTLS_ONE_SETTING_NO_PAT     	|			\
+	VM_ENTRY_LOAD_PAT)
+#define	VM_ENTRY_CTLS_ZERO_SETTING					\
+	(VM_ENTRY_LOAD_DEBUG_CONTROLS		|			\
+	VM_ENTRY_INTO_SMM			|			\
+	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
+
+#define	guest_msr_rw(vmx, msr) \
+	msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
+
+#define	HANDLED		1
+#define	UNHANDLED	0
+
+MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+
+int vmxon_enabled[MAXCPU];
+static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
+static uint32_t exit_ctls, entry_ctls;
+
+static uint64_t cr0_ones_mask, cr0_zeros_mask;
+static uint64_t cr4_ones_mask, cr4_zeros_mask;
+
+static volatile u_int nextvpid;
+
+static int vmx_no_patmsr;
+
+/*
+ * Virtual NMI blocking conditions.
+ *
+ * Some processor implementations also require NMI to be blocked if
+ * the STI_BLOCKING bit is set. It is possible to detect this at runtime
+ * based on the (exit_reason,exit_qual) tuple being set to 
+ * (EXIT_REASON_INVAL_VMCS, EXIT_QUAL_NMI_WHILE_STI_BLOCKING).
+ *
+ * We take the easy way out and also include STI_BLOCKING as one of the
+ * gating items for vNMI injection.
+ */
+static uint64_t nmi_blocking_bits = VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING |
+				    VMCS_INTERRUPTIBILITY_NMI_BLOCKING |
+				    VMCS_INTERRUPTIBILITY_STI_BLOCKING;
+
+/*
+ * Optional capabilities
+ */
+static int cap_halt_exit;
+static int cap_pause_exit;
+static int cap_unrestricted_guest;
+static int cap_monitor_trap;
+ 
+/* statistics */
+static VMM_STAT_INTEL(VMEXIT_HLT_IGNORED, "number of times hlt was ignored");
+
+#ifdef KTR
+static const char *
+exit_reason_to_str(int reason)
+{
+	static char reasonbuf[32];
+
+	switch (reason) {
+	case EXIT_REASON_EXCEPTION:
+		return "exception";
+	case EXIT_REASON_EXT_INTR:
+		return "extint";
+	case EXIT_REASON_TRIPLE_FAULT:
+		return "triplefault";
+	case EXIT_REASON_INIT:
+		return "init";
+	case EXIT_REASON_SIPI:
+		return "sipi";
+	case EXIT_REASON_IO_SMI:
+		return "iosmi";
+	case EXIT_REASON_SMI:
+		return "smi";
+	case EXIT_REASON_INTR_WINDOW:
+		return "intrwindow";
+	case EXIT_REASON_NMI_WINDOW:
+		return "nmiwindow";
+	case EXIT_REASON_TASK_SWITCH:
+		return "taskswitch";
+	case EXIT_REASON_CPUID:
+		return "cpuid";
+	case EXIT_REASON_GETSEC:
+		return "getsec";
+	case EXIT_REASON_HLT:
+		return "hlt";
+	case EXIT_REASON_INVD:
+		return "invd";
+	case EXIT_REASON_INVLPG:
+		return "invlpg";
+	case EXIT_REASON_RDPMC:
+		return "rdpmc";
+	case EXIT_REASON_RDTSC:
+		return "rdtsc";
+	case EXIT_REASON_RSM:
+		return "rsm";
+	case EXIT_REASON_VMCALL:
+		return "vmcall";
+	case EXIT_REASON_VMCLEAR:
+		return "vmclear";
+	case EXIT_REASON_VMLAUNCH:
+		return "vmlaunch";
+	case EXIT_REASON_VMPTRLD:
+		return "vmptrld";
+	case EXIT_REASON_VMPTRST:
+		return "vmptrst";
+	case EXIT_REASON_VMREAD:
+		return "vmread";
+	case EXIT_REASON_VMRESUME:
+		return "vmresume";
+	case EXIT_REASON_VMWRITE:
+		return "vmwrite";
+	case EXIT_REASON_VMXOFF:
+		return "vmxoff";
+	case EXIT_REASON_VMXON:
+		return "vmxon";
+	case EXIT_REASON_CR_ACCESS:
+		return "craccess";
+	case EXIT_REASON_DR_ACCESS:
+		return "draccess";
+	case EXIT_REASON_INOUT:
+		return "inout";
+	case EXIT_REASON_RDMSR:
+		return "rdmsr";
+	case EXIT_REASON_WRMSR:
+		return "wrmsr";
+	case EXIT_REASON_INVAL_VMCS:
+		return "invalvmcs";
+	case EXIT_REASON_INVAL_MSR:
+		return "invalmsr";
+	case EXIT_REASON_MWAIT:
+		return "mwait";
+	case EXIT_REASON_MTF:
+		return "mtf";
+	case EXIT_REASON_MONITOR:
+		return "monitor";
+	case EXIT_REASON_PAUSE:
+		return "pause";
+	case EXIT_REASON_MCE:
+		return "mce";
+	case EXIT_REASON_TPR:
+		return "tpr";
+	case EXIT_REASON_APIC:
+		return "apic";
+	case EXIT_REASON_GDTR_IDTR:
+		return "gdtridtr";
+	case EXIT_REASON_LDTR_TR:
+		return "ldtrtr";
+	case EXIT_REASON_EPT_FAULT:
+		return "eptfault";
+	case EXIT_REASON_EPT_MISCONFIG:
+		return "eptmisconfig";
+	case EXIT_REASON_INVEPT:
+		return "invept";
+	case EXIT_REASON_RDTSCP:
+		return "rdtscp";
+	case EXIT_REASON_VMX_PREEMPT:
+		return "vmxpreempt";
+	case EXIT_REASON_INVVPID:
+		return "invvpid";
+	case EXIT_REASON_WBINVD:
+		return "wbinvd";
+	case EXIT_REASON_XSETBV:
+		return "xsetbv";
+	default:
+		snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
+		return (reasonbuf);
+	}
+}
+
+#ifdef SETJMP_TRACE
+static const char *
+vmx_setjmp_rc2str(int rc)
+{
+	switch (rc) {
+	case VMX_RETURN_DIRECT:
+		return "direct";
+	case VMX_RETURN_LONGJMP:
+		return "longjmp";
+	case VMX_RETURN_VMRESUME:
+		return "vmresume";
+	case VMX_RETURN_VMLAUNCH:
+		return "vmlaunch";
+	case VMX_RETURN_AST:
+		return "ast";
+	default:
+		return "unknown";
+	}
+}
+
+#define	SETJMP_TRACE(vmx, vcpu, vmxctx, regname)			  \
+	VMM_CTR1((vmx)->vm, (vcpu), "setjmp trace " #regname " 0x%016lx", \
+		 (vmxctx)->regname)
+
+static void
+vmx_setjmp_trace(struct vmx *vmx, int vcpu, struct vmxctx *vmxctx, int rc)
+{
+	uint64_t host_rip, host_rsp;
+
+	if (vmxctx != &vmx->ctx[vcpu])
+		panic("vmx_setjmp_trace: invalid vmxctx %p; should be %p",
+			vmxctx, &vmx->ctx[vcpu]);
+
+	VMM_CTR1((vmx)->vm, (vcpu), "vmxctx = %p", vmxctx);
+	VMM_CTR2((vmx)->vm, (vcpu), "setjmp return code %s(%d)",
+		 vmx_setjmp_rc2str(rc), rc);
+
+	host_rsp = host_rip = ~0;
+	vmread(VMCS_HOST_RIP, &host_rip);
+	vmread(VMCS_HOST_RSP, &host_rsp);
+	VMM_CTR2((vmx)->vm, (vcpu), "vmcs host_rip 0x%016lx, host_rsp 0x%016lx",
+		 host_rip, host_rsp);
+
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r15);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r14);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r13);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r12);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rbp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rsp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rbx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rip);
+
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rdi);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rsi);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rdx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rcx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r8);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r9);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rax);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rbx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rbp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r10);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r11);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r12);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r13);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r14);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r15);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_cr2);
+}
+#endif
+#else
+static void __inline
+vmx_setjmp_trace(struct vmx *vmx, int vcpu, struct vmxctx *vmxctx, int rc)
+{
+	return;
+}
+#endif	/* KTR */
+
+u_long
+vmx_fix_cr0(u_long cr0)
+{
+
+	return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
+}
+
+u_long
+vmx_fix_cr4(u_long cr4)
+{
+
+	return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
+}
+
+static void
+msr_save_area_init(struct msr_entry *g_area, int *g_count)
+{
+	int cnt;
+
+	static struct msr_entry guest_msrs[] = {
+		{ MSR_KGSBASE, 0, 0 },
+	};
+
+	cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]);
+	if (cnt > GUEST_MSR_MAX_ENTRIES)
+		panic("guest msr save area overrun");
+	bcopy(guest_msrs, g_area, sizeof(guest_msrs));
+	*g_count = cnt;
+}
+
+static void
+vmx_disable(void *arg __unused)
+{
+	struct invvpid_desc invvpid_desc = { 0 };
+	struct invept_desc invept_desc = { 0 };
+
+	if (vmxon_enabled[curcpu]) {
+		/*
+		 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b.
+		 *
+		 * VMXON or VMXOFF are not required to invalidate any TLB
+		 * caching structures. This prevents potential retention of
+		 * cached information in the TLB between distinct VMX episodes.
+		 */
+		invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc);
+		invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc);
+		vmxoff();
+	}
+	load_cr4(rcr4() & ~CR4_VMXE);
+}
+
+static int
+vmx_cleanup(void)
+{
+
+	smp_rendezvous(NULL, vmx_disable, NULL, NULL);
+
+	return (0);
+}
+
+static void
+vmx_enable(void *arg __unused)
+{
+	int error;
+
+	load_cr4(rcr4() | CR4_VMXE);
+
+	*(uint32_t *)vmxon_region[curcpu] = vmx_revision();
+	error = vmxon(vmxon_region[curcpu]);
+	if (error == 0)
+		vmxon_enabled[curcpu] = 1;
+}
+
+static int
+vmx_init(void)
+{
+	int error;
+	uint64_t fixed0, fixed1, feature_control;
+	uint32_t tmp;
+
+	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
+	if (!(cpu_feature2 & CPUID2_VMX)) {
+		printf("vmx_init: processor does not support VMX operation\n");
+		return (ENXIO);
+	}
+
+	/*
+	 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
+	 * are set (bits 0 and 2 respectively).
+	 */
+	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
+	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
+		printf("vmx_init: VMX operation disabled by BIOS\n");
+		return (ENXIO);
+	}
+
+	/* Check support for primary processor-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+			       MSR_VMX_TRUE_PROCBASED_CTLS,
+			       PROCBASED_CTLS_ONE_SETTING,
+			       PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired primary "
+		       "processor-based controls\n");
+		return (error);
+	}
+
+	/* Clear the processor-based ctl bits that are set on demand */
+	procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING;
+
+	/* Check support for secondary processor-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+			       MSR_VMX_PROCBASED_CTLS2,
+			       PROCBASED_CTLS2_ONE_SETTING,
+			       PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2);
+	if (error) {
+		printf("vmx_init: processor does not support desired secondary "
+		       "processor-based controls\n");
+		return (error);
+	}
+
+	/* Check support for VPID */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
+			       PROCBASED2_ENABLE_VPID, 0, &tmp);
+	if (error == 0)
+		procbased_ctls2 |= PROCBASED2_ENABLE_VPID;
+
+	/* Check support for pin-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
+			       MSR_VMX_TRUE_PINBASED_CTLS,
+			       PINBASED_CTLS_ONE_SETTING,
+			       PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired "
+		       "pin-based controls\n");
+		return (error);
+	}
+
+	/* Check support for VM-exit controls */
+	error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
+			       VM_EXIT_CTLS_ONE_SETTING,
+			       VM_EXIT_CTLS_ZERO_SETTING,
+			       &exit_ctls);
+	if (error) {
+		/* Try again without the PAT MSR bits */
+		error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS,
+				       MSR_VMX_TRUE_EXIT_CTLS,
+				       VM_EXIT_CTLS_ONE_SETTING_NO_PAT,
+				       VM_EXIT_CTLS_ZERO_SETTING,
+				       &exit_ctls);
+		if (error) {
+			printf("vmx_init: processor does not support desired "
+			       "exit controls\n");
+			return (error);
+		} else {
+			if (bootverbose)
+				printf("vmm: PAT MSR access not supported\n");
+			guest_msr_valid(MSR_PAT);
+			vmx_no_patmsr = 1;
+		}
+	}
+
+	/* Check support for VM-entry controls */
+	if (!vmx_no_patmsr) {
+		error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
+				       MSR_VMX_TRUE_ENTRY_CTLS,
+				       VM_ENTRY_CTLS_ONE_SETTING,
+				       VM_ENTRY_CTLS_ZERO_SETTING,
+				       &entry_ctls);
+	} else {
+		error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
+				       MSR_VMX_TRUE_ENTRY_CTLS,
+				       VM_ENTRY_CTLS_ONE_SETTING_NO_PAT,
+				       VM_ENTRY_CTLS_ZERO_SETTING,
+				       &entry_ctls);
+	}
+
+	if (error) {
+		printf("vmx_init: processor does not support desired "
+		       "entry controls\n");
+		       return (error);
+	}
+
+	/*
+	 * Check support for optional features by testing them
+	 * as individual bits
+	 */
+	cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					MSR_VMX_TRUE_PROCBASED_CTLS,
+					PROCBASED_HLT_EXITING, 0,
+					&tmp) == 0);
+
+	cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					MSR_VMX_PROCBASED_CTLS,
+					PROCBASED_MTF, 0,
+					&tmp) == 0);
+
+	cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					 MSR_VMX_TRUE_PROCBASED_CTLS,
+					 PROCBASED_PAUSE_EXITING, 0,
+					 &tmp) == 0);
+
+	cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+					MSR_VMX_PROCBASED_CTLS2,
+					PROCBASED2_UNRESTRICTED_GUEST, 0,
+				        &tmp) == 0);
+
+	/* Initialize EPT */
+	error = ept_init();
+	if (error) {
+		printf("vmx_init: ept initialization failed (%d)\n", error);
+		return (error);
+	}
+
+	/*
+	 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
+	 */
+	fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
+	fixed1 = rdmsr(MSR_VMX_CR0_FIXED1);
+	cr0_ones_mask = fixed0 & fixed1;
+	cr0_zeros_mask = ~fixed0 & ~fixed1;
+
+	/*
+	 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation
+	 * if unrestricted guest execution is allowed.
+	 */
+	if (cap_unrestricted_guest)
+		cr0_ones_mask &= ~(CR0_PG | CR0_PE);
+
+	/*
+	 * Do not allow the guest to set CR0_NW or CR0_CD.
+	 */
+	cr0_zeros_mask |= (CR0_NW | CR0_CD);
+
+	fixed0 = rdmsr(MSR_VMX_CR4_FIXED0);
+	fixed1 = rdmsr(MSR_VMX_CR4_FIXED1);
+	cr4_ones_mask = fixed0 & fixed1;
+	cr4_zeros_mask = ~fixed0 & ~fixed1;
+
+	/* enable VMX operation */
+	smp_rendezvous(NULL, vmx_enable, NULL, NULL);
+
+	return (0);
+}
+
+/*
+ * If this processor does not support VPIDs then simply return 0.
+ *
+ * Otherwise generate the next value of VPID to use. Any value is alright
+ * as long as it is non-zero.
+ *
+ * We always execute in VMX non-root context with EPT enabled. Thus all
+ * combined mappings are tagged with the (EP4TA, VPID, PCID) tuple. This
+ * in turn means that multiple VMs can share the same VPID as long as
+ * they have distinct EPT page tables.
+ *
+ * XXX
+ * We should optimize this so that it returns VPIDs that are not in
+ * use. Then we will not unnecessarily invalidate mappings in
+ * vmx_set_pcpu_defaults() just because two or more vcpus happen to
+ * use the same 'vpid'.
+ */
+static uint16_t
+vmx_vpid(void)
+{
+	uint16_t vpid = 0;
+
+	if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) != 0) {
+		do {
+			vpid = atomic_fetchadd_int(&nextvpid, 1);
+		} while (vpid == 0);
+	}
+
+	return (vpid);
+}
+
+static int
+vmx_setup_cr_shadow(int which, struct vmcs *vmcs)
+{
+	int error, mask_ident, shadow_ident;
+	uint64_t mask_value, shadow_value;
+
+	if (which != 0 && which != 4)
+		panic("vmx_setup_cr_shadow: unknown cr%d", which);
+
+	if (which == 0) {
+		mask_ident = VMCS_CR0_MASK;
+		mask_value = cr0_ones_mask | cr0_zeros_mask;
+		shadow_ident = VMCS_CR0_SHADOW;
+		shadow_value = cr0_ones_mask;
+	} else {
+		mask_ident = VMCS_CR4_MASK;
+		mask_value = cr4_ones_mask | cr4_zeros_mask;
+		shadow_ident = VMCS_CR4_SHADOW;
+		shadow_value = cr4_ones_mask;
+	}
+
+	error = vmcs_setreg(vmcs, VMCS_IDENT(mask_ident), mask_value);
+	if (error)
+		return (error);
+
+	error = vmcs_setreg(vmcs, VMCS_IDENT(shadow_ident), shadow_value);
+	if (error)
+		return (error);
+
+	return (0);
+}
+#define	vmx_setup_cr0_shadow(vmcs)	vmx_setup_cr_shadow(0, (vmcs))
+#define	vmx_setup_cr4_shadow(vmcs)	vmx_setup_cr_shadow(4, (vmcs))
+
+static void *
+vmx_vminit(struct vm *vm)
+{
+	uint16_t vpid;
+	int i, error, guest_msr_count;
+	struct vmx *vmx;
+
+	vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
+	if ((uintptr_t)vmx & PAGE_MASK) {
+		panic("malloc of struct vmx not aligned on %d byte boundary",
+		      PAGE_SIZE);
+	}
+	vmx->vm = vm;
+
+	/*
+	 * Clean up EPTP-tagged guest physical and combined mappings
+	 *
+	 * VMX transitions are not required to invalidate any guest physical
+	 * mappings. So, it may be possible for stale guest physical mappings
+	 * to be present in the processor TLBs.
+	 *
+	 * Combined mappings for this EP4TA are also invalidated for all VPIDs.
+	 */
+	ept_invalidate_mappings(vtophys(vmx->pml4ept));
+
+	msr_bitmap_initialize(vmx->msr_bitmap);
+
+	/*
+	 * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE.
+	 * The guest FSBASE and GSBASE are saved and restored during
+	 * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are
+	 * always restored from the vmcs host state area on vm-exit.
+	 *
+	 * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in
+	 * how they are saved/restored so can be directly accessed by the
+	 * guest.
+	 *
+	 * Guest KGSBASE is saved and restored in the guest MSR save area.
+	 * Host KGSBASE is restored before returning to userland from the pcb.
+	 * There will be a window of time when we are executing in the host
+	 * kernel context with a value of KGSBASE from the guest. This is ok
+	 * because the value of KGSBASE is inconsequential in kernel context.
+	 *
+	 * MSR_EFER is saved and restored in the guest VMCS area on a
+	 * VM exit and entry respectively. It is also restored from the
+	 * host VMCS area on a VM exit.
+	 */
+	if (guest_msr_rw(vmx, MSR_GSBASE) ||
+	    guest_msr_rw(vmx, MSR_FSBASE) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
+	    guest_msr_rw(vmx, MSR_KGSBASE) ||
+	    guest_msr_rw(vmx, MSR_EFER))
+		panic("vmx_vminit: error setting guest msr access");
+
+	/*
+	 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
+	 * and entry respectively. It is also restored from the host VMCS
+	 * area on a VM exit. However, if running on a system with no
+	 * MSR_PAT save/restore support, leave access disabled so accesses
+	 * will be trapped.
+	 */
+	if (!vmx_no_patmsr && guest_msr_rw(vmx, MSR_PAT))
+		panic("vmx_vminit: error setting guest pat msr access");
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vmx->vmcs[i].identifier = vmx_revision();
+		error = vmclear(&vmx->vmcs[i]);
+		if (error != 0) {
+			panic("vmx_vminit: vmclear error %d on vcpu %d\n",
+			      error, i);
+		}
+
+		vpid = vmx_vpid();
+
+		error = vmcs_set_defaults(&vmx->vmcs[i],
+					  (u_long)vmx_longjmp,
+					  (u_long)&vmx->ctx[i],
+					  vtophys(vmx->pml4ept),
+					  pinbased_ctls,
+					  procbased_ctls,
+					  procbased_ctls2,
+					  exit_ctls, entry_ctls,
+					  vtophys(vmx->msr_bitmap),
+					  vpid);
+
+		if (error != 0)
+			panic("vmx_vminit: vmcs_set_defaults error %d", error);
+
+		vmx->cap[i].set = 0;
+		vmx->cap[i].proc_ctls = procbased_ctls;
+
+		vmx->state[i].lastcpu = -1;
+		vmx->state[i].vpid = vpid;
+
+		msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
+
+		error = vmcs_set_msr_save(&vmx->vmcs[i],
+					  vtophys(vmx->guest_msrs[i]),
+					  guest_msr_count);
+		if (error != 0)
+			panic("vmcs_set_msr_save error %d", error);
+
+		error = vmx_setup_cr0_shadow(&vmx->vmcs[i]);
+		if (error != 0)
+			panic("vmx_setup_cr0_shadow %d", error);
+
+		error = vmx_setup_cr4_shadow(&vmx->vmcs[i]);
+		if (error != 0)
+			panic("vmx_setup_cr4_shadow %d", error);
+	}
+
+	return (vmx);
+}
+
+static int
+vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
+{
+	int handled, func;
+	
+	func = vmxctx->guest_rax;
+
+	handled = x86_emulate_cpuid(vm, vcpu,
+				    (uint32_t*)(&vmxctx->guest_rax),
+				    (uint32_t*)(&vmxctx->guest_rbx),
+				    (uint32_t*)(&vmxctx->guest_rcx),
+				    (uint32_t*)(&vmxctx->guest_rdx));
+	return (handled);
+}
+
+static __inline void
+vmx_run_trace(struct vmx *vmx, int vcpu)
+{
+#ifdef KTR
+	VMM_CTR1(vmx->vm, vcpu, "Resume execution at 0x%0lx", vmcs_guest_rip());
+#endif
+}
+
+static __inline void
+vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason,
+	       int handled)
+{
+#ifdef KTR
+	VMM_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx",
+		 handled ? "handled" : "unhandled",
+		 exit_reason_to_str(exit_reason), rip);
+#endif
+}
+
+static __inline void
+vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
+{
+#ifdef KTR
+	VMM_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip);
+#endif
+}
+
+static int
+vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
+{
+	int error, lastcpu;
+	struct vmxstate *vmxstate;
+	struct invvpid_desc invvpid_desc = { 0 };
+
+	vmxstate = &vmx->state[vcpu];
+	lastcpu = vmxstate->lastcpu;
+	vmxstate->lastcpu = curcpu;
+
+	if (lastcpu == curcpu) {
+		error = 0;
+		goto done;
+	}
+
+	vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
+
+	error = vmwrite(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
+	if (error != 0)
+		goto done;
+
+	error = vmwrite(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
+	if (error != 0)
+		goto done;
+
+	error = vmwrite(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
+	if (error != 0)
+		goto done;
+
+	/*
+	 * If we are using VPIDs then invalidate all mappings tagged with 'vpid'
+	 *
+	 * We do this because this vcpu was executing on a different host
+	 * cpu when it last ran. We do not track whether it invalidated
+	 * mappings associated with its 'vpid' during that run. So we must
+	 * assume that the mappings associated with 'vpid' on 'curcpu' are
+	 * stale and invalidate them.
+	 *
+	 * Note that we incur this penalty only when the scheduler chooses to
+	 * move the thread associated with this vcpu between host cpus.
+	 *
+	 * Note also that this will invalidate mappings tagged with 'vpid'
+	 * for "all" EP4TAs.
+	 */
+	if (vmxstate->vpid != 0) {
+		invvpid_desc.vpid = vmxstate->vpid;
+		invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+	}
+done:
+	return (error);
+}
+
+static void 
+vm_exit_update_rip(struct vm_exit *vmexit)
+{
+	int error;
+
+	error = vmwrite(VMCS_GUEST_RIP, vmexit->rip + vmexit->inst_length);
+	if (error)
+		panic("vmx_run: error %d writing to VMCS_GUEST_RIP", error);
+}
+
+/*
+ * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set.
+ */
+CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0);
+
+static void __inline
+vmx_set_int_window_exiting(struct vmx *vmx, int vcpu)
+{
+	int error;
+
+	vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING;
+
+	error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	if (error)
+		panic("vmx_set_int_window_exiting: vmwrite error %d", error);
+}
+
+static void __inline
+vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
+{
+	int error;
+
+	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING;
+
+	error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	if (error)
+		panic("vmx_clear_int_window_exiting: vmwrite error %d", error);
+}
+
+static void __inline
+vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+	int error;
+
+	vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
+
+	error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	if (error)
+		panic("vmx_set_nmi_window_exiting: vmwrite error %d", error);
+}
+
+static void __inline
+vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+	int error;
+
+	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
+
+	error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	if (error)
+		panic("vmx_clear_nmi_window_exiting: vmwrite error %d", error);
+}
+
+static int
+vmx_inject_nmi(struct vmx *vmx, int vcpu)
+{
+	int error;
+	uint64_t info, interruptibility;
+
+	/* Bail out if no NMI requested */
+	if (!vm_nmi_pending(vmx->vm, vcpu))
+		return (0);
+
+	error = vmread(VMCS_GUEST_INTERRUPTIBILITY, &interruptibility);
+	if (error) {
+		panic("vmx_inject_nmi: vmread(interruptibility) %d",
+			error);
+	}
+	if (interruptibility & nmi_blocking_bits)
+		goto nmiblocked;
+
+	/*
+	 * Inject the virtual NMI. The vector must be the NMI IDT entry
+	 * or the VMCS entry check will fail.
+	 */
+	info = VMCS_INTERRUPTION_INFO_NMI | VMCS_INTERRUPTION_INFO_VALID;
+	info |= IDT_NMI;
+
+	error = vmwrite(VMCS_ENTRY_INTR_INFO, info);
+	if (error)
+		panic("vmx_inject_nmi: vmwrite(intrinfo) %d", error);
+
+	VMM_CTR0(vmx->vm, vcpu, "Injecting vNMI");
+
+	/* Clear the request */
+	vm_nmi_clear(vmx->vm, vcpu);
+	return (1);
+
+nmiblocked:
+	/*
+	 * Set the NMI Window Exiting execution control so we can inject
+	 * the virtual NMI as soon as blocking condition goes away.
+	 */
+	vmx_set_nmi_window_exiting(vmx, vcpu);
+
+	VMM_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting");
+	return (1);
+}
+
+static void
+vmx_inject_interrupts(struct vmx *vmx, int vcpu)
+{
+	int error, vector;
+	uint64_t info, rflags, interruptibility;
+
+	const int HWINTR_BLOCKED = VMCS_INTERRUPTIBILITY_STI_BLOCKING |
+				   VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING;
+
+	/*
+	 * If there is already an interrupt pending then just return.
+	 *
+	 * This could happen if an interrupt was injected on a prior
+	 * VM entry but the actual entry into guest mode was aborted
+	 * because of a pending AST.
+	 */
+	error = vmread(VMCS_ENTRY_INTR_INFO, &info);
+	if (error)
+		panic("vmx_inject_interrupts: vmread(intrinfo) %d", error);
+	if (info & VMCS_INTERRUPTION_INFO_VALID)
+		return;
+
+	/*
+	 * NMI injection has priority so deal with those first
+	 */
+	if (vmx_inject_nmi(vmx, vcpu))
+		return;
+
+	/* Ask the local apic for a vector to inject */
+	vector = lapic_pending_intr(vmx->vm, vcpu);
+	if (vector < 0)
+		return;
+
+	if (vector < 32 || vector > 255)
+		panic("vmx_inject_interrupts: invalid vector %d\n", vector);
+
+	/* Check RFLAGS.IF and the interruptibility state of the guest */
+	error = vmread(VMCS_GUEST_RFLAGS, &rflags);
+	if (error)
+		panic("vmx_inject_interrupts: vmread(rflags) %d", error);
+
+	if ((rflags & PSL_I) == 0)
+		goto cantinject;
+
+	error = vmread(VMCS_GUEST_INTERRUPTIBILITY, &interruptibility);
+	if (error) {
+		panic("vmx_inject_interrupts: vmread(interruptibility) %d",
+			error);
+	}
+	if (interruptibility & HWINTR_BLOCKED)
+		goto cantinject;
+
+	/* Inject the interrupt */
+	info = VMCS_INTERRUPTION_INFO_HW_INTR | VMCS_INTERRUPTION_INFO_VALID;
+	info |= vector;
+	error = vmwrite(VMCS_ENTRY_INTR_INFO, info);
+	if (error)
+		panic("vmx_inject_interrupts: vmwrite(intrinfo) %d", error);
+
+	/* Update the Local APIC ISR */
+	lapic_intr_accepted(vmx->vm, vcpu, vector);
+
+	VMM_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
+
+	return;
+
+cantinject:
+	/*
+	 * Set the Interrupt Window Exiting execution control so we can inject
+	 * the interrupt as soon as blocking condition goes away.
+	 */
+	vmx_set_int_window_exiting(vmx, vcpu);
+
+	VMM_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting");
+}
+
+static int
+vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
+{
+	int error, cr, vmcs_guest_cr;
+	uint64_t regval, ones_mask, zeros_mask;
+	const struct vmxctx *vmxctx;
+
+	/* We only handle mov to %cr0 or %cr4 at this time */
+	if ((exitqual & 0xf0) != 0x00)
+		return (UNHANDLED);
+
+	cr = exitqual & 0xf;
+	if (cr != 0 && cr != 4)
+		return (UNHANDLED);
+
+	vmxctx = &vmx->ctx[vcpu];
+
+	/*
+	 * We must use vmwrite() directly here because vmcs_setreg() will
+	 * call vmclear(vmcs) as a side-effect which we certainly don't want.
+	 */
+	switch ((exitqual >> 8) & 0xf) {
+	case 0:
+		regval = vmxctx->guest_rax;
+		break;
+	case 1:
+		regval = vmxctx->guest_rcx;
+		break;
+	case 2:
+		regval = vmxctx->guest_rdx;
+		break;
+	case 3:
+		regval = vmxctx->guest_rbx;
+		break;
+	case 4:
+		error = vmread(VMCS_GUEST_RSP, &regval);
+		if (error) {
+			panic("vmx_emulate_cr_access: "
+			      "error %d reading guest rsp", error);
+		}
+		break;
+	case 5:
+		regval = vmxctx->guest_rbp;
+		break;
+	case 6:
+		regval = vmxctx->guest_rsi;
+		break;
+	case 7:
+		regval = vmxctx->guest_rdi;
+		break;
+	case 8:
+		regval = vmxctx->guest_r8;
+		break;
+	case 9:
+		regval = vmxctx->guest_r9;
+		break;
+	case 10:
+		regval = vmxctx->guest_r10;
+		break;
+	case 11:
+		regval = vmxctx->guest_r11;
+		break;
+	case 12:
+		regval = vmxctx->guest_r12;
+		break;
+	case 13:
+		regval = vmxctx->guest_r13;
+		break;
+	case 14:
+		regval = vmxctx->guest_r14;
+		break;
+	case 15:
+		regval = vmxctx->guest_r15;
+		break;
+	}
+
+	if (cr == 0) {
+		ones_mask = cr0_ones_mask;
+		zeros_mask = cr0_zeros_mask;
+		vmcs_guest_cr = VMCS_GUEST_CR0;
+	} else {
+		ones_mask = cr4_ones_mask;
+		zeros_mask = cr4_zeros_mask;
+		vmcs_guest_cr = VMCS_GUEST_CR4;
+	}
+	regval |= ones_mask;
+	regval &= ~zeros_mask;
+	error = vmwrite(vmcs_guest_cr, regval);
+	if (error) {
+		panic("vmx_emulate_cr_access: error %d writing cr%d",
+		      error, cr);
+	}
+
+	return (HANDLED);
+}
+
+static int
+vmx_ept_fault(struct vm *vm, int cpu,
+	      uint64_t gla, uint64_t gpa, uint64_t rip, int inst_length,
+	      uint64_t cr3, uint64_t ept_qual, struct vie *vie)
+{
+	int read, write, error;
+
+	/* EPT violation on an instruction fetch doesn't make sense here */
+	if (ept_qual & EPT_VIOLATION_INST_FETCH)
+		return (UNHANDLED);
+
+	/* EPT violation must be a read fault or a write fault */
+	read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
+	write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
+	if ((read | write) == 0)
+		return (UNHANDLED);
+
+	/*
+	 * The EPT violation must have been caused by accessing a
+	 * guest-physical address that is a translation of a guest-linear
+	 * address.
+	 */
+	if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
+	    (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
+		return (UNHANDLED);
+	}
+
+	/* Fetch, decode and emulate the faulting instruction */
+	if (vmm_fetch_instruction(vm, cpu, rip, inst_length, cr3, vie) != 0)
+		return (UNHANDLED);
+
+	if (vmm_decode_instruction(vm, cpu, gla, vie) != 0)
+		return (UNHANDLED);
+
+	/*
+	 * Check if this is a local apic access
+	 */
+	if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
+		return (UNHANDLED);
+
+	error = vmm_emulate_instruction(vm, cpu, gpa, vie,
+					lapic_mmio_read, lapic_mmio_write, 0);
+
+	return (error ? UNHANDLED : HANDLED);
+}
+
+static int
+vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
+{
+	int error, handled;
+	struct vmcs *vmcs;
+	struct vmxctx *vmxctx;
+	uint32_t eax, ecx, edx;
+	uint64_t qual, gla, gpa, cr3, intr_info;
+
+	handled = 0;
+	vmcs = &vmx->vmcs[vcpu];
+	vmxctx = &vmx->ctx[vcpu];
+	qual = vmexit->u.vmx.exit_qualification;
+	vmexit->exitcode = VM_EXITCODE_BOGUS;
+
+	vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
+
+	switch (vmexit->u.vmx.exit_reason) {
+	case EXIT_REASON_CR_ACCESS:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
+		handled = vmx_emulate_cr_access(vmx, vcpu, qual);
+		break;
+	case EXIT_REASON_RDMSR:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
+		ecx = vmxctx->guest_rcx;
+		error = emulate_rdmsr(vmx->vm, vcpu, ecx);
+		if (error) {
+			vmexit->exitcode = VM_EXITCODE_RDMSR;
+			vmexit->u.msr.code = ecx;
+		} else
+			handled = 1;
+		break;
+	case EXIT_REASON_WRMSR:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
+		eax = vmxctx->guest_rax;
+		ecx = vmxctx->guest_rcx;
+		edx = vmxctx->guest_rdx;
+		error = emulate_wrmsr(vmx->vm, vcpu, ecx,
+					(uint64_t)edx << 32 | eax);
+		if (error) {
+			vmexit->exitcode = VM_EXITCODE_WRMSR;
+			vmexit->u.msr.code = ecx;
+			vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
+		} else
+			handled = 1;
+		break;
+	case EXIT_REASON_HLT:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+		/*
+		 * If there is an event waiting to be injected then there is
+		 * no need to 'hlt'.
+		 */
+		error = vmread(VMCS_ENTRY_INTR_INFO, &intr_info);
+		if (error)
+			panic("vmx_exit_process: vmread(intrinfo) %d", error);
+
+		if (intr_info & VMCS_INTERRUPTION_INFO_VALID) {
+			handled = 1;
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT_IGNORED, 1);
+		} else
+			vmexit->exitcode = VM_EXITCODE_HLT;
+		break;
+	case EXIT_REASON_MTF:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
+		vmexit->exitcode = VM_EXITCODE_MTRAP;
+		break;
+	case EXIT_REASON_PAUSE:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
+		vmexit->exitcode = VM_EXITCODE_PAUSE;
+		break;
+	case EXIT_REASON_INTR_WINDOW:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1);
+		vmx_clear_int_window_exiting(vmx, vcpu);
+		VMM_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
+		return (1);
+	case EXIT_REASON_EXT_INTR:
+		/*
+		 * External interrupts serve only to cause VM exits and allow
+		 * the host interrupt handler to run.
+		 *
+		 * If this external interrupt triggers a virtual interrupt
+		 * to a VM, then that state will be recorded by the
+		 * host interrupt handler in the VM's softc. We will inject
+		 * this virtual interrupt during the subsequent VM enter.
+		 */
+
+		/*
+		 * This is special. We want to treat this as an 'handled'
+		 * VM-exit but not increment the instruction pointer.
+		 */
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1);
+		return (1);
+	case EXIT_REASON_NMI_WINDOW:
+		/* Exit to allow the pending virtual NMI to be injected */
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1);
+		vmx_clear_nmi_window_exiting(vmx, vcpu);
+		VMM_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
+		return (1);
+	case EXIT_REASON_INOUT:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
+		vmexit->exitcode = VM_EXITCODE_INOUT;
+		vmexit->u.inout.bytes = (qual & 0x7) + 1;
+		vmexit->u.inout.in = (qual & 0x8) ? 1 : 0;
+		vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
+		vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
+		vmexit->u.inout.port = (uint16_t)(qual >> 16);
+		vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
+		break;
+	case EXIT_REASON_CPUID:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1);
+		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
+		break;
+	case EXIT_REASON_EPT_FAULT:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EPT_FAULT, 1);
+		gla = vmcs_gla();
+		gpa = vmcs_gpa();
+		cr3 = vmcs_guest_cr3();
+		handled = vmx_ept_fault(vmx->vm, vcpu, gla, gpa,
+					vmexit->rip, vmexit->inst_length,
+					cr3, qual, &vmexit->u.paging.vie);
+		if (!handled) {
+			vmexit->exitcode = VM_EXITCODE_PAGING;
+			vmexit->u.paging.gpa = gpa;
+		}
+		break;
+	default:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
+		break;
+	}
+
+	if (handled) {
+		/*
+		 * It is possible that control is returned to userland
+		 * even though we were able to handle the VM exit in the
+		 * kernel.
+		 *
+		 * In such a case we want to make sure that the userland
+		 * restarts guest execution at the instruction *after*
+		 * the one we just processed. Therefore we update the
+		 * guest rip in the VMCS and in 'vmexit'.
+		 */
+		vm_exit_update_rip(vmexit);
+		vmexit->rip += vmexit->inst_length;
+		vmexit->inst_length = 0;
+
+		/*
+		 * Special case for spinning up an AP - exit to userspace to
+		 * give the controlling process a chance to intercept and
+		 * spin up a thread for the AP.
+		 */
+		if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP)
+			handled = 0;
+	} else {
+		if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
+			/*
+			 * If this VM exit was not claimed by anybody then
+			 * treat it as a generic VMX exit.
+			 */
+			vmexit->exitcode = VM_EXITCODE_VMX;
+			vmexit->u.vmx.error = 0;
+		} else {
+			/*
+			 * The exitcode and collateral have been populated.
+			 * The VM exit will be processed further in userland.
+			 */
+		}
+	}
+	return (handled);
+}
+
+static int
+vmx_run(void *arg, int vcpu, register_t rip)
+{
+	int error, vie, rc, handled, astpending;
+	uint32_t exit_reason;
+	struct vmx *vmx;
+	struct vmxctx *vmxctx;
+	struct vmcs *vmcs;
+	struct vm_exit *vmexit;
+	
+	vmx = arg;
+	vmcs = &vmx->vmcs[vcpu];
+	vmxctx = &vmx->ctx[vcpu];
+	vmxctx->launched = 0;
+
+	astpending = 0;
+	vmexit = vm_exitinfo(vmx->vm, vcpu);
+
+	/*
+	 * XXX Can we avoid doing this every time we do a vm run?
+	 */
+	VMPTRLD(vmcs);
+
+	/*
+	 * XXX
+	 * We do this every time because we may setup the virtual machine
+	 * from a different process than the one that actually runs it.
+	 *
+	 * If the life of a virtual machine was spent entirely in the context
+	 * of a single process we could do this once in vmcs_set_defaults().
+	 */
+	if ((error = vmwrite(VMCS_HOST_CR3, rcr3())) != 0)
+		panic("vmx_run: error %d writing to VMCS_HOST_CR3", error);
+
+	if ((error = vmwrite(VMCS_GUEST_RIP, rip)) != 0)
+		panic("vmx_run: error %d writing to VMCS_GUEST_RIP", error);
+
+	if ((error = vmx_set_pcpu_defaults(vmx, vcpu)) != 0)
+		panic("vmx_run: error %d setting up pcpu defaults", error);
+
+	do {
+		lapic_timer_tick(vmx->vm, vcpu);
+		vmx_inject_interrupts(vmx, vcpu);
+		vmx_run_trace(vmx, vcpu);
+		rc = vmx_setjmp(vmxctx);
+#ifdef SETJMP_TRACE
+		vmx_setjmp_trace(vmx, vcpu, vmxctx, rc);
+#endif
+		switch (rc) {
+		case VMX_RETURN_DIRECT:
+			if (vmxctx->launched == 0) {
+				vmxctx->launched = 1;
+				vmx_launch(vmxctx);
+			} else
+				vmx_resume(vmxctx);
+			panic("vmx_launch/resume should not return");
+			break;
+		case VMX_RETURN_LONGJMP:
+			break;			/* vm exit */
+		case VMX_RETURN_AST:
+			astpending = 1;
+			break;
+		case VMX_RETURN_VMRESUME:
+			vie = vmcs_instruction_error();
+			if (vmxctx->launch_error == VM_FAIL_INVALID ||
+			    vie != VMRESUME_WITH_NON_LAUNCHED_VMCS) {
+				printf("vmresume error %d vmcs inst error %d\n",
+					vmxctx->launch_error, vie);
+				goto err_exit;
+			}
+			vmx_launch(vmxctx);	/* try to launch the guest */
+			panic("vmx_launch should not return");
+			break;
+		case VMX_RETURN_VMLAUNCH:
+			vie = vmcs_instruction_error();
+#if 1
+			printf("vmlaunch error %d vmcs inst error %d\n",
+				vmxctx->launch_error, vie);
+#endif
+			goto err_exit;
+		default:
+			panic("vmx_setjmp returned %d", rc);
+		}
+		
+		/* enable interrupts */
+		enable_intr();
+
+		/* collect some basic information for VM exit processing */
+		vmexit->rip = rip = vmcs_guest_rip();
+		vmexit->inst_length = vmexit_instruction_length();
+		vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
+		vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
+
+		if (astpending) {
+			handled = 1;
+			vmexit->inst_length = 0;
+			vmexit->exitcode = VM_EXITCODE_BOGUS;
+			vmx_astpending_trace(vmx, vcpu, rip);
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_ASTPENDING, 1);
+			break;
+		}
+
+		handled = vmx_exit_process(vmx, vcpu, vmexit);
+		vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
+
+	} while (handled);
+
+	/*
+	 * If a VM exit has been handled then the exitcode must be BOGUS
+	 * If a VM exit is not handled then the exitcode must not be BOGUS
+	 */
+	if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) ||
+	    (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) {
+		panic("Mismatch between handled (%d) and exitcode (%d)",
+		      handled, vmexit->exitcode);
+	}
+
+	if (!handled)
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_USERSPACE, 1);
+
+	VMM_CTR1(vmx->vm, vcpu, "goto userland: exitcode %d",vmexit->exitcode);
+
+	/*
+	 * XXX
+	 * We need to do this to ensure that any VMCS state cached by the
+	 * processor is flushed to memory. We need to do this in case the
+	 * VM moves to a different cpu the next time it runs.
+	 *
+	 * Can we avoid doing this?
+	 */
+	VMCLEAR(vmcs);
+	return (0);
+
+err_exit:
+	vmexit->exitcode = VM_EXITCODE_VMX;
+	vmexit->u.vmx.exit_reason = (uint32_t)-1;
+	vmexit->u.vmx.exit_qualification = (uint32_t)-1;
+	vmexit->u.vmx.error = vie;
+	VMCLEAR(vmcs);
+	return (ENOEXEC);
+}
+
+static void
+vmx_vmcleanup(void *arg)
+{
+	int error;
+	struct vmx *vmx = arg;
+
+	/*
+	 * XXXSMP we also need to clear the VMCS active on the other vcpus.
+	 */
+	error = vmclear(&vmx->vmcs[0]);
+	if (error != 0)
+		panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error);
+
+	ept_vmcleanup(vmx);
+	free(vmx, M_VMX);
+
+	return;
+}
+
+static register_t *
+vmxctx_regptr(struct vmxctx *vmxctx, int reg)
+{
+
+	switch (reg) {
+	case VM_REG_GUEST_RAX:
+		return (&vmxctx->guest_rax);
+	case VM_REG_GUEST_RBX:
+		return (&vmxctx->guest_rbx);
+	case VM_REG_GUEST_RCX:
+		return (&vmxctx->guest_rcx);
+	case VM_REG_GUEST_RDX:
+		return (&vmxctx->guest_rdx);
+	case VM_REG_GUEST_RSI:
+		return (&vmxctx->guest_rsi);
+	case VM_REG_GUEST_RDI:
+		return (&vmxctx->guest_rdi);
+	case VM_REG_GUEST_RBP:
+		return (&vmxctx->guest_rbp);
+	case VM_REG_GUEST_R8:
+		return (&vmxctx->guest_r8);
+	case VM_REG_GUEST_R9:
+		return (&vmxctx->guest_r9);
+	case VM_REG_GUEST_R10:
+		return (&vmxctx->guest_r10);
+	case VM_REG_GUEST_R11:
+		return (&vmxctx->guest_r11);
+	case VM_REG_GUEST_R12:
+		return (&vmxctx->guest_r12);
+	case VM_REG_GUEST_R13:
+		return (&vmxctx->guest_r13);
+	case VM_REG_GUEST_R14:
+		return (&vmxctx->guest_r14);
+	case VM_REG_GUEST_R15:
+		return (&vmxctx->guest_r15);
+	default:
+		break;
+	}
+	return (NULL);
+}
+
+static int
+vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval)
+{
+	register_t *regp;
+
+	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
+		*retval = *regp;
+		return (0);
+	} else
+		return (EINVAL);
+}
+
+static int
+vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
+{
+	register_t *regp;
+
+	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
+		*regp = val;
+		return (0);
+	} else
+		return (EINVAL);
+}
+
+static int
+vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
+{
+	struct vmx *vmx = arg;
+
+	if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
+		return (0);
+
+	/*
+	 * If the vcpu is running then don't mess with the VMCS.
+	 *
+	 * vmcs_getreg will VMCLEAR the vmcs when it is done which will cause
+	 * the subsequent vmlaunch/vmresume to fail.
+	 */
+	if (vcpu_is_running(vmx->vm, vcpu))
+		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	return (vmcs_getreg(&vmx->vmcs[vcpu], reg, retval));
+}
+
+static int
+vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
+{
+	int error;
+	uint64_t ctls;
+	struct vmx *vmx = arg;
+
+	/*
+	 * XXX Allow caller to set contents of the guest registers saved in
+	 * the 'vmxctx' even though the vcpu might be running. We need this
+	 * specifically to support the rdmsr emulation that will set the
+	 * %eax and %edx registers during vm exit processing.
+	 */
+	if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
+		return (0);
+
+	/*
+	 * If the vcpu is running then don't mess with the VMCS.
+	 *
+	 * vmcs_setreg will VMCLEAR the vmcs when it is done which will cause
+	 * the subsequent vmlaunch/vmresume to fail.
+	 */
+	if (vcpu_is_running(vmx->vm, vcpu))
+		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	error = vmcs_setreg(&vmx->vmcs[vcpu], reg, val);
+
+	if (error == 0) {
+		/*
+		 * If the "load EFER" VM-entry control is 1 then the
+		 * value of EFER.LMA must be identical to "IA-32e mode guest"
+		 * bit in the VM-entry control.
+		 */
+		if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 &&
+		    (reg == VM_REG_GUEST_EFER)) {
+			vmcs_getreg(&vmx->vmcs[vcpu],
+				    VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls);
+			if (val & EFER_LMA)
+				ctls |= VM_ENTRY_GUEST_LMA;
+			else
+				ctls &= ~VM_ENTRY_GUEST_LMA;
+			vmcs_setreg(&vmx->vmcs[vcpu],
+				    VMCS_IDENT(VMCS_ENTRY_CTLS), ctls);
+		}
+	}
+
+	return (error);
+}
+
+static int
+vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+	struct vmx *vmx = arg;
+
+	return (vmcs_getdesc(&vmx->vmcs[vcpu], reg, desc));
+}
+
+static int
+vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+	struct vmx *vmx = arg;
+
+	return (vmcs_setdesc(&vmx->vmcs[vcpu], reg, desc));
+}
+
+static int
+vmx_inject(void *arg, int vcpu, int type, int vector, uint32_t code,
+	   int code_valid)
+{
+	int error;
+	uint64_t info;
+	struct vmx *vmx = arg;
+	struct vmcs *vmcs = &vmx->vmcs[vcpu];
+
+	static uint32_t type_map[VM_EVENT_MAX] = {
+		0x1,		/* VM_EVENT_NONE */
+		0x0,		/* VM_HW_INTR */
+		0x2,		/* VM_NMI */
+		0x3,		/* VM_HW_EXCEPTION */
+		0x4,		/* VM_SW_INTR */
+		0x5,		/* VM_PRIV_SW_EXCEPTION */
+		0x6,		/* VM_SW_EXCEPTION */
+	};
+
+	/*
+	 * If there is already an exception pending to be delivered to the
+	 * vcpu then just return.
+	 */
+	error = vmcs_getreg(vmcs, VMCS_IDENT(VMCS_ENTRY_INTR_INFO), &info);
+	if (error)
+		return (error);
+
+	if (info & VMCS_INTERRUPTION_INFO_VALID)
+		return (EAGAIN);
+
+	info = vector | (type_map[type] << 8) | (code_valid ? 1 << 11 : 0);
+	info |= VMCS_INTERRUPTION_INFO_VALID;
+	error = vmcs_setreg(vmcs, VMCS_IDENT(VMCS_ENTRY_INTR_INFO), info);
+	if (error != 0)
+		return (error);
+
+	if (code_valid) {
+		error = vmcs_setreg(vmcs,
+				    VMCS_IDENT(VMCS_ENTRY_EXCEPTION_ERROR),
+				    code);
+	}
+	return (error);
+}
+
+static int
+vmx_getcap(void *arg, int vcpu, int type, int *retval)
+{
+	struct vmx *vmx = arg;
+	int vcap;
+	int ret;
+
+	ret = ENOENT;
+
+	vcap = vmx->cap[vcpu].set;
+
+	switch (type) {
+	case VM_CAP_HALT_EXIT:
+		if (cap_halt_exit)
+			ret = 0;
+		break;
+	case VM_CAP_PAUSE_EXIT:
+		if (cap_pause_exit)
+			ret = 0;
+		break;
+	case VM_CAP_MTRAP_EXIT:
+		if (cap_monitor_trap)
+			ret = 0;
+		break;
+	case VM_CAP_UNRESTRICTED_GUEST:
+		if (cap_unrestricted_guest)
+			ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	if (ret == 0)
+		*retval = (vcap & (1 << type)) ? 1 : 0;
+
+	return (ret);
+}
+
+static int
+vmx_setcap(void *arg, int vcpu, int type, int val)
+{
+	struct vmx *vmx = arg;
+	struct vmcs *vmcs = &vmx->vmcs[vcpu];
+	uint32_t baseval;
+	uint32_t *pptr;
+	int error;
+	int flag;
+	int reg;
+	int retval;
+
+	retval = ENOENT;
+	pptr = NULL;
+
+	switch (type) {
+	case VM_CAP_HALT_EXIT:
+		if (cap_halt_exit) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_HLT_EXITING;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_MTRAP_EXIT:
+		if (cap_monitor_trap) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_MTF;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_PAUSE_EXIT:
+		if (cap_pause_exit) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_PAUSE_EXITING;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_UNRESTRICTED_GUEST:
+		if (cap_unrestricted_guest) {
+			retval = 0;
+			baseval = procbased_ctls2;
+			flag = PROCBASED2_UNRESTRICTED_GUEST;
+			reg = VMCS_SEC_PROC_BASED_CTLS;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (retval == 0) {
+		if (val) {
+			baseval |= flag;
+		} else {
+			baseval &= ~flag;
+		}
+		VMPTRLD(vmcs);
+		error = vmwrite(reg, baseval);
+		VMCLEAR(vmcs);
+
+		if (error) {
+			retval = error;
+		} else {
+			/*
+			 * Update optional stored flags, and record
+			 * setting
+			 */
+			if (pptr != NULL) {
+				*pptr = baseval;
+			}
+
+			if (val) {
+				vmx->cap[vcpu].set |= (1 << type);
+			} else {
+				vmx->cap[vcpu].set &= ~(1 << type);
+			}
+		}
+	}
+
+        return (retval);
+}
+
+struct vmm_ops vmm_ops_intel = {
+	vmx_init,
+	vmx_cleanup,
+	vmx_vminit,
+	vmx_run,
+	vmx_vmcleanup,
+	ept_vmmmap_set,
+	ept_vmmmap_get,
+	vmx_getreg,
+	vmx_setreg,
+	vmx_getdesc,
+	vmx_setdesc,
+	vmx_inject,
+	vmx_getcap,
+	vmx_setcap
+};
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
new file mode 100644
index 0000000..c7cd567
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -0,0 +1,120 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMX_H_
+#define	_VMX_H_
+
+#include "vmcs.h"
+
+#define	GUEST_MSR_MAX_ENTRIES	64		/* arbitrary */
+
+struct vmxctx {
+	register_t	tmpstk[32];		/* vmx_return() stack */
+	register_t	tmpstktop;
+
+	register_t	guest_rdi;		/* Guest state */
+	register_t	guest_rsi;
+	register_t	guest_rdx;
+	register_t	guest_rcx;
+	register_t	guest_r8;
+	register_t	guest_r9;
+	register_t	guest_rax;
+	register_t	guest_rbx;
+	register_t	guest_rbp;
+	register_t	guest_r10;
+	register_t	guest_r11;
+	register_t	guest_r12;
+	register_t	guest_r13;
+	register_t	guest_r14;
+	register_t	guest_r15;
+	register_t	guest_cr2;
+
+	register_t	host_r15;		/* Host state */
+	register_t	host_r14;
+	register_t	host_r13;
+	register_t	host_r12;
+	register_t	host_rbp;
+	register_t	host_rsp;
+	register_t	host_rbx;
+	register_t	host_rip;
+	/*
+	 * XXX todo debug registers and fpu state
+	 */
+	
+	int		launched;		/* vmcs launch state */
+	int		launch_error;
+};
+
+struct vmxcap {
+	int	set;
+	uint32_t proc_ctls;
+};
+
+struct vmxstate {
+	int	lastcpu;	/* host cpu that this 'vcpu' last ran on */
+	uint16_t vpid;
+};
+
+/* virtual machine softc */
+struct vmx {
+	pml4_entry_t	pml4ept[NPML4EPG];
+	struct vmcs	vmcs[VM_MAXCPU];	/* one vmcs per virtual cpu */
+	char		msr_bitmap[PAGE_SIZE];
+	struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
+	struct vmxctx	ctx[VM_MAXCPU];
+	struct vmxcap	cap[VM_MAXCPU];
+	struct vmxstate	state[VM_MAXCPU];
+	struct vm	*vm;
+};
+CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
+CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
+CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
+CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
+
+#define	VMX_RETURN_DIRECT	0
+#define	VMX_RETURN_LONGJMP	1
+#define	VMX_RETURN_VMRESUME	2
+#define	VMX_RETURN_VMLAUNCH	3
+#define	VMX_RETURN_AST		4
+/*
+ * vmx_setjmp() returns:
+ * - 0 when it returns directly
+ * - 1 when it returns from vmx_longjmp
+ * - 2 when it returns from vmx_resume (which would only be in the error case)
+ * - 3 when it returns from vmx_launch (which would only be in the error case)
+ * - 4 when it returns from vmx_resume or vmx_launch because of AST pending
+ */
+int	vmx_setjmp(struct vmxctx *ctx);
+void	vmx_longjmp(void);			/* returns via vmx_setjmp */
+void	vmx_launch(struct vmxctx *ctx) __dead2;	/* may return via vmx_setjmp */
+void	vmx_resume(struct vmxctx *ctx) __dead2;	/* may return via vmx_setjmp */
+
+u_long	vmx_fix_cr0(u_long cr0);
+u_long	vmx_fix_cr4(u_long cr4);
+
+#endif
diff --git a/sys/amd64/vmm/intel/vmx_controls.h b/sys/amd64/vmm/intel/vmx_controls.h
new file mode 100644
index 0000000..31f29f8
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_controls.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMX_CONTROLS_H_
+#define	_VMX_CONTROLS_H_
+
+/* Pin-Based VM-Execution Controls */
+#define	PINBASED_EXTINT_EXITING		(1 << 0)
+#define	PINBASED_NMI_EXITING		(1 << 3)
+#define	PINBASED_VIRTUAL_NMI		(1 << 5)
+#define	PINBASED_PREMPTION_TIMER	(1 << 6)
+
+/* Primary Processor-Based VM-Execution Controls */
+#define	PROCBASED_INT_WINDOW_EXITING	(1 << 2)
+#define	PROCBASED_TSC_OFFSET		(1 << 3)
+#define	PROCBASED_HLT_EXITING		(1 << 7)
+#define	PROCBASED_INVLPG_EXITING	(1 << 9)
+#define	PROCBASED_MWAIT_EXITING		(1 << 10)
+#define	PROCBASED_RDPMC_EXITING		(1 << 11)
+#define	PROCBASED_RDTSC_EXITING		(1 << 12)
+#define	PROCBASED_CR3_LOAD_EXITING	(1 << 15)
+#define	PROCBASED_CR3_STORE_EXITING	(1 << 16)
+#define	PROCBASED_CR8_LOAD_EXITING	(1 << 19)
+#define	PROCBASED_CR8_STORE_EXITING	(1 << 20)
+#define	PROCBASED_USE_TPR_SHADOW	(1 << 21)
+#define	PROCBASED_NMI_WINDOW_EXITING	(1 << 22)
+#define PROCBASED_MOV_DR_EXITING	(1 << 23)
+#define	PROCBASED_IO_EXITING		(1 << 24)
+#define	PROCBASED_IO_BITMAPS		(1 << 25)
+#define	PROCBASED_MTF			(1 << 27)
+#define	PROCBASED_MSR_BITMAPS		(1 << 28)
+#define	PROCBASED_MONITOR_EXITING	(1 << 29)
+#define	PROCBASED_PAUSE_EXITING		(1 << 30)
+#define	PROCBASED_SECONDARY_CONTROLS	(1 << 31)
+
+/* Secondary Processor-Based VM-Execution Controls */
+#define	PROCBASED2_VIRTUALIZE_APIC	(1 << 0)
+#define	PROCBASED2_ENABLE_EPT		(1 << 1)
+#define	PROCBASED2_DESC_TABLE_EXITING	(1 << 2)
+#define	PROCBASED2_ENABLE_RDTSCP	(1 << 3)
+#define	PROCBASED2_VIRTUALIZE_X2APIC	(1 << 4)
+#define	PROCBASED2_ENABLE_VPID		(1 << 5)
+#define	PROCBASED2_WBINVD_EXITING	(1 << 6)
+#define	PROCBASED2_UNRESTRICTED_GUEST	(1 << 7)
+#define	PROCBASED2_PAUSE_LOOP_EXITING	(1 << 10)
+
+/* VM Exit Controls */
+#define	VM_EXIT_SAVE_DEBUG_CONTROLS	(1 << 2)
+#define	VM_EXIT_HOST_LMA		(1 << 9)
+#define	VM_EXIT_LOAD_PERF_GLOBAL_CTRL	(1 << 12)
+#define	VM_EXIT_ACKNOWLEDGE_INTERRUPT	(1 << 15)
+#define	VM_EXIT_SAVE_PAT		(1 << 18)
+#define	VM_EXIT_LOAD_PAT		(1 << 19)
+#define	VM_EXIT_SAVE_EFER		(1 << 20)
+#define	VM_EXIT_LOAD_EFER		(1 << 21)
+#define	VM_EXIT_SAVE_PREEMPTION_TIMER	(1 << 22)
+
+/* VM Entry Controls */
+#define	VM_ENTRY_LOAD_DEBUG_CONTROLS	(1 << 2)
+#define	VM_ENTRY_GUEST_LMA		(1 << 9)
+#define	VM_ENTRY_INTO_SMM		(1 << 10)
+#define	VM_ENTRY_DEACTIVATE_DUAL_MONITOR (1 << 11)
+#define	VM_ENTRY_LOAD_PERF_GLOBAL_CTRL	(1 << 13)
+#define	VM_ENTRY_LOAD_PAT		(1 << 14)
+#define	VM_ENTRY_LOAD_EFER		(1 << 15)
+
+#endif
diff --git a/sys/amd64/vmm/intel/vmx_cpufunc.h b/sys/amd64/vmm/intel/vmx_cpufunc.h
new file mode 100644
index 0000000..2e66443
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_cpufunc.h
@@ -0,0 +1,218 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMX_CPUFUNC_H_
+#define	_VMX_CPUFUNC_H_
+
+struct vmcs;
+
+/*
+ * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
+ *
+ *			error
+ * VMsucceed		  0
+ * VMFailInvalid	  1
+ * VMFailValid		  2	see also VMCS VM-Instruction Error Field
+ */
+#define	VM_SUCCESS		0
+#define	VM_FAIL_INVALID		1
+#define	VM_FAIL_VALID		2
+#define	VMX_SET_ERROR_CODE \
+	"	jnc 1f;"						\
+	"	mov $1, %[error];"	/* CF: error = 1 */		\
+	"	jmp 3f;"						\
+	"1:	jnz 2f;"						\
+	"	mov $2, %[error];"	/* ZF: error = 2 */		\
+	"	jmp 3f;"						\
+	"2:	mov $0, %[error];"					\
+	"3:"
+
+/* returns 0 on success and non-zero on failure */
+static __inline int
+vmxon(char *region)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(region);
+	__asm __volatile("vmxon %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+
+	return (error);
+}
+
+/* returns 0 on success and non-zero on failure */
+static __inline int
+vmclear(struct vmcs *vmcs)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(vmcs);
+	__asm __volatile("vmclear %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+	return (error);
+}
+
+static __inline void
+vmxoff(void)
+{
+
+	__asm __volatile("vmxoff");
+}
+
+static __inline void
+vmptrst(uint64_t *addr)
+{
+
+	__asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory");
+}
+
+static __inline int
+vmptrld(struct vmcs *vmcs)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(vmcs);
+	__asm __volatile("vmptrld %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+	return (error);
+}
+
+static __inline int
+vmwrite(uint64_t reg, uint64_t val)
+{
+	int error;
+
+	__asm __volatile("vmwrite %[val], %[reg];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [val] "r" (val), [reg] "r" (reg)
+			 : "memory");
+
+	return (error);
+}
+
+static __inline int
+vmread(uint64_t r, uint64_t *addr)
+{
+	int error;
+
+	__asm __volatile("vmread %[r], %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [r] "r" (r), [addr] "m" (*addr)
+			 : "memory");
+
+	return (error);
+}
+
+static void __inline
+VMCLEAR(struct vmcs *vmcs)
+{
+	int err;
+
+	err = vmclear(vmcs);
+	if (err != 0)
+		panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
+
+	critical_exit();
+}
+
+static void __inline
+VMPTRLD(struct vmcs *vmcs)
+{
+	int err;
+
+	critical_enter();
+
+	err = vmptrld(vmcs);
+	if (err != 0)
+		panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
+}
+
+#define	INVVPID_TYPE_ADDRESS		0UL
+#define	INVVPID_TYPE_SINGLE_CONTEXT	1UL
+#define	INVVPID_TYPE_ALL_CONTEXTS	2UL
+
+struct invvpid_desc {
+	uint16_t	vpid;
+	uint16_t	_res1;
+	uint32_t	_res2;
+	uint64_t	linear_addr;
+};
+CTASSERT(sizeof(struct invvpid_desc) == 16);
+
+static void __inline
+invvpid(uint64_t type, struct invvpid_desc desc)
+{
+	int error;
+
+	__asm __volatile("invvpid %[desc], %[type];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [desc] "m" (desc), [type] "r" (type)
+			 : "memory");
+
+	if (error)
+		panic("invvpid error %d", error);
+}
+
+#define	INVEPT_TYPE_SINGLE_CONTEXT	1UL
+#define	INVEPT_TYPE_ALL_CONTEXTS	2UL
+struct invept_desc {
+	uint64_t	eptp;
+	uint64_t	_res;
+};
+CTASSERT(sizeof(struct invept_desc) == 16);
+
+static void __inline
+invept(uint64_t type, struct invept_desc desc)
+{
+	int error;
+
+	__asm __volatile("invept %[desc], %[type];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [desc] "m" (desc), [type] "r" (type)
+			 : "memory");
+
+	if (error)
+		panic("invept error %d", error);
+}
+#endif
diff --git a/sys/amd64/vmm/intel/vmx_genassym.c b/sys/amd64/vmm/intel/vmx_genassym.c
new file mode 100644
index 0000000..823a05d
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_genassym.c
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/assym.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/pmap.h>
+
+#include <machine/vmm.h>
+#include "vmx.h"
+#include "vmx_cpufunc.h"
+
+ASSYM(VMXCTX_TMPSTKTOP, offsetof(struct vmxctx, tmpstktop));
+ASSYM(VMXCTX_GUEST_RDI, offsetof(struct vmxctx, guest_rdi));
+ASSYM(VMXCTX_GUEST_RSI, offsetof(struct vmxctx, guest_rsi));
+ASSYM(VMXCTX_GUEST_RDX, offsetof(struct vmxctx, guest_rdx));
+ASSYM(VMXCTX_GUEST_RCX, offsetof(struct vmxctx, guest_rcx));
+ASSYM(VMXCTX_GUEST_R8, offsetof(struct vmxctx, guest_r8));
+ASSYM(VMXCTX_GUEST_R9, offsetof(struct vmxctx, guest_r9));
+ASSYM(VMXCTX_GUEST_RAX, offsetof(struct vmxctx, guest_rax));
+ASSYM(VMXCTX_GUEST_RBX, offsetof(struct vmxctx, guest_rbx));
+ASSYM(VMXCTX_GUEST_RBP, offsetof(struct vmxctx, guest_rbp));
+ASSYM(VMXCTX_GUEST_R10, offsetof(struct vmxctx, guest_r10));
+ASSYM(VMXCTX_GUEST_R11, offsetof(struct vmxctx, guest_r11));
+ASSYM(VMXCTX_GUEST_R12, offsetof(struct vmxctx, guest_r12));
+ASSYM(VMXCTX_GUEST_R13, offsetof(struct vmxctx, guest_r13));
+ASSYM(VMXCTX_GUEST_R14, offsetof(struct vmxctx, guest_r14));
+ASSYM(VMXCTX_GUEST_R15, offsetof(struct vmxctx, guest_r15));
+ASSYM(VMXCTX_GUEST_CR2, offsetof(struct vmxctx, guest_cr2));
+
+ASSYM(VMXCTX_HOST_R15, offsetof(struct vmxctx, host_r15));
+ASSYM(VMXCTX_HOST_R14, offsetof(struct vmxctx, host_r14));
+ASSYM(VMXCTX_HOST_R13, offsetof(struct vmxctx, host_r13));
+ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12));
+ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp));
+ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp));
+ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
+ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
+
+ASSYM(VMXCTX_LAUNCH_ERROR, offsetof(struct vmxctx, launch_error));
+
+ASSYM(VM_SUCCESS,	VM_SUCCESS);
+ASSYM(VM_FAIL_INVALID,	VM_FAIL_INVALID);
+ASSYM(VM_FAIL_VALID,	VM_FAIL_VALID);
+
+ASSYM(VMX_RETURN_DIRECT,	VMX_RETURN_DIRECT);
+ASSYM(VMX_RETURN_LONGJMP,	VMX_RETURN_LONGJMP);
+ASSYM(VMX_RETURN_VMRESUME,	VMX_RETURN_VMRESUME);
+ASSYM(VMX_RETURN_VMLAUNCH,	VMX_RETURN_VMLAUNCH);
+ASSYM(VMX_RETURN_AST,		VMX_RETURN_AST);
+
+ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
+ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
+ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
new file mode 100644
index 0000000..2aba63c
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -0,0 +1,172 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/cpufunc.h>
+
+#include "vmx_msr.h"
+
+static boolean_t
+vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
+{
+
+	if (msr_val & (1UL << (bitpos + 32)))
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+static boolean_t
+vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
+{
+
+	if ((msr_val & (1UL << bitpos)) == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+uint32_t
+vmx_revision(void)
+{
+
+	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
+}
+
+/*
+ * Generate a bitmask to be used for the VMCS execution control fields.
+ *
+ * The caller specifies what bits should be set to one in 'ones_mask'
+ * and what bits should be set to zero in 'zeros_mask'. The don't-care
+ * bits are set to the default value. The default values are obtained
+ * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
+ * VMX Capabilities".
+ *
+ * Returns zero on success and non-zero on error.
+ */
+int
+vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
+	       uint32_t zeros_mask, uint32_t *retval)
+{
+	int i;
+	uint64_t val, trueval;
+	boolean_t true_ctls_avail, one_allowed, zero_allowed;
+
+	/* We cannot ask the same bit to be set to both '1' and '0' */
+	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
+		return (EINVAL);
+
+	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
+		true_ctls_avail = TRUE;
+	else
+		true_ctls_avail = FALSE;
+
+	val = rdmsr(ctl_reg);
+	if (true_ctls_avail)
+		trueval = rdmsr(true_ctl_reg);		/* step c */
+	else
+		trueval = val;				/* step a */
+
+	for (i = 0; i < 32; i++) {
+		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
+		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
+
+		KASSERT(one_allowed || zero_allowed,
+			("invalid zero/one setting for bit %d of ctl 0x%0x, "
+			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
+
+		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
+			if (ones_mask & (1 << i))
+				return (EINVAL);
+			*retval &= ~(1 << i);
+		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
+			if (zeros_mask & (1 << i))
+				return (EINVAL);
+			*retval |= 1 << i;
+		} else {
+			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
+				*retval &= ~(1 << i);
+			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
+				*retval |= 1 << i;
+			else if (!true_ctls_avail)
+				*retval &= ~(1 << i);	/* b(iii) */
+			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
+				*retval &= ~(1 << i);
+			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
+				*retval |= 1 << i;
+			else {
+				panic("vmx_set_ctlreg: unable to determine "
+				      "correct value of ctl bit %d for msr "
+				      "0x%0x and true msr 0x%0x", i, ctl_reg,
+				      true_ctl_reg);
+			}
+		}
+	}
+
+	return (0);
+}
+
+void
+msr_bitmap_initialize(char *bitmap)
+{
+
+	memset(bitmap, 0xff, PAGE_SIZE);
+}
+
+int
+msr_bitmap_change_access(char *bitmap, u_int msr, int access)
+{
+	int byte, bit;
+
+	if (msr <= 0x00001FFF)
+		byte = msr / 8;
+	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
+		byte = 1024 + (msr - 0xC0000000) / 8;
+	else
+		return (EINVAL);
+
+	bit = msr & 0x7;
+
+	if (access & MSR_BITMAP_ACCESS_READ)
+		bitmap[byte] &= ~(1 << bit);
+	else
+		bitmap[byte] |= 1 << bit;
+
+	byte += 2048;
+	if (access & MSR_BITMAP_ACCESS_WRITE)
+		bitmap[byte] &= ~(1 << bit);
+	else
+		bitmap[byte] |= 1 << bit;
+
+	return (0);
+}
diff --git a/sys/amd64/vmm/intel/vmx_msr.h b/sys/amd64/vmm/intel/vmx_msr.h
new file mode 100644
index 0000000..e6379a9
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_msr.h
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMX_MSR_H_
+#define	_VMX_MSR_H_
+
+#define	MSR_VMX_BASIC			0x480
+#define	MSR_VMX_EPT_VPID_CAP		0x48C
+
+#define	MSR_VMX_PROCBASED_CTLS		0x482
+#define	MSR_VMX_TRUE_PROCBASED_CTLS	0x48E
+
+#define	MSR_VMX_PINBASED_CTLS		0x481
+#define	MSR_VMX_TRUE_PINBASED_CTLS	0x48D
+
+#define	MSR_VMX_PROCBASED_CTLS2		0x48B
+
+#define	MSR_VMX_EXIT_CTLS		0x483
+#define	MSR_VMX_TRUE_EXIT_CTLS		0x48f
+
+#define	MSR_VMX_ENTRY_CTLS		0x484
+#define	MSR_VMX_TRUE_ENTRY_CTLS		0x490
+
+#define	MSR_VMX_CR0_FIXED0		0x486
+#define	MSR_VMX_CR0_FIXED1		0x487
+
+#define	MSR_VMX_CR4_FIXED0		0x488
+#define	MSR_VMX_CR4_FIXED1		0x489
+
+uint32_t vmx_revision(void);
+
+int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
+		   uint32_t zeros_mask, uint32_t *retval);
+
+/*
+ * According to Section 21.10.4 "Software Access to Related Structures",
+ * changes to data structures pointed to by the VMCS must be made only when
+ * there is no logical processor with a current VMCS that points to the
+ * data structure.
+ *
+ * This pretty much limits us to configuring the MSR bitmap before VMCS
+ * initialization for SMP VMs. Unless of course we do it the hard way - which
+ * would involve some form of synchronization between the vcpus to vmclear
+ * all VMCSs' that point to the bitmap.
+ */
+#define	MSR_BITMAP_ACCESS_NONE	0x0
+#define	MSR_BITMAP_ACCESS_READ	0x1
+#define	MSR_BITMAP_ACCESS_WRITE	0x2
+#define	MSR_BITMAP_ACCESS_RW	(MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
+void	msr_bitmap_initialize(char *bitmap);
+int	msr_bitmap_change_access(char *bitmap, u_int msr, int access);
+
+#endif
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S
new file mode 100644
index 0000000..4ba582a
--- /dev/null
+++ b/sys/amd64/vmm/intel/vmx_support.S
@@ -0,0 +1,246 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+
+#include "vmx_assym.s"
+
+/*
+ * Disable interrupts before updating %rsp in VMX_CHECK_AST or
+ * VMX_GUEST_RESTORE.
+ *
+ * The location that %rsp points to is a 'vmxctx' and not a
+ * real stack so we don't want an interrupt handler to trash it
+ */
+#define	VMX_DISABLE_INTERRUPTS		cli
+
+/*
+ * If the thread hosting the vcpu has an ast pending then take care of it
+ * by returning from vmx_setjmp() with a return value of VMX_RETURN_AST.
+ *
+ * Assumes that %rdi holds a pointer to the 'vmxctx' and that interrupts
+ * are disabled.
+ */
+#define	VMX_CHECK_AST							\
+	movq	PCPU(CURTHREAD),%rax;					\
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax);	\
+	je	9f;							\
+	movq	$VMX_RETURN_AST,%rsi;					\
+	movq	%rdi,%rsp;						\
+	addq	$VMXCTX_TMPSTKTOP,%rsp;					\
+	callq	vmx_return;						\
+9:
+
+/*
+ * Assumes that %rdi holds a pointer to the 'vmxctx'.
+ *
+ * On "return" all registers are updated to reflect guest state. The two
+ * exceptions are %rip and %rsp. These registers are atomically switched
+ * by hardware from the guest area of the vmcs.
+ *
+ * We modify %rsp to point to the 'vmxctx' so we can use it to restore
+ * host context in case of an error with 'vmlaunch' or 'vmresume'.
+ */
+#define	VMX_GUEST_RESTORE						\
+	movq	%rdi,%rsp;						\
+	movq	VMXCTX_GUEST_CR2(%rdi),%rsi;				\
+	movq	%rsi,%cr2;						\
+	movq	VMXCTX_GUEST_RSI(%rdi),%rsi;				\
+	movq	VMXCTX_GUEST_RDX(%rdi),%rdx;				\
+	movq	VMXCTX_GUEST_RCX(%rdi),%rcx;				\
+	movq	VMXCTX_GUEST_R8(%rdi),%r8;				\
+	movq	VMXCTX_GUEST_R9(%rdi),%r9;				\
+	movq	VMXCTX_GUEST_RAX(%rdi),%rax;				\
+	movq	VMXCTX_GUEST_RBX(%rdi),%rbx;				\
+	movq	VMXCTX_GUEST_RBP(%rdi),%rbp;				\
+	movq	VMXCTX_GUEST_R10(%rdi),%r10;				\
+	movq	VMXCTX_GUEST_R11(%rdi),%r11;				\
+	movq	VMXCTX_GUEST_R12(%rdi),%r12;				\
+	movq	VMXCTX_GUEST_R13(%rdi),%r13;				\
+	movq	VMXCTX_GUEST_R14(%rdi),%r14;				\
+	movq	VMXCTX_GUEST_R15(%rdi),%r15;				\
+	movq	VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
+
+#define	VM_INSTRUCTION_ERROR(reg)					\
+	jnc 	1f;							\
+	movl 	$VM_FAIL_INVALID,reg;		/* CF is set */		\
+	jmp 	3f;							\
+1:	jnz 	2f;							\
+	movl 	$VM_FAIL_VALID,reg;		/* ZF is set */		\
+	jmp 	3f;							\
+2:	movl 	$VM_SUCCESS,reg;					\
+3:	movl	reg,VMXCTX_LAUNCH_ERROR(%rsp)
+
+	.text
+/*
+ * int vmx_setjmp(ctxp)
+ * %rdi = ctxp
+ *
+ * Return value is '0' when it returns directly from here.
+ * Return value is '1' when it returns after a vm exit through vmx_longjmp.
+ */
+ENTRY(vmx_setjmp)
+	movq	(%rsp),%rax			/* return address */
+	movq    %r15,VMXCTX_HOST_R15(%rdi)
+	movq    %r14,VMXCTX_HOST_R14(%rdi)
+	movq    %r13,VMXCTX_HOST_R13(%rdi)
+	movq    %r12,VMXCTX_HOST_R12(%rdi)
+	movq    %rbp,VMXCTX_HOST_RBP(%rdi)
+	movq    %rsp,VMXCTX_HOST_RSP(%rdi)
+	movq    %rbx,VMXCTX_HOST_RBX(%rdi)
+	movq    %rax,VMXCTX_HOST_RIP(%rdi)
+
+	/*
+	 * XXX save host debug registers
+	 */
+	movl	$VMX_RETURN_DIRECT,%eax
+	ret
+END(vmx_setjmp)
+
+/*
+ * void vmx_return(struct vmxctx *ctxp, int retval)
+ * %rdi = ctxp
+ * %rsi = retval
+ * Return to vmm context through vmx_setjmp() with a value of 'retval'.
+ */
+ENTRY(vmx_return)
+	/* Restore host context. */
+	movq	VMXCTX_HOST_R15(%rdi),%r15
+	movq	VMXCTX_HOST_R14(%rdi),%r14
+	movq	VMXCTX_HOST_R13(%rdi),%r13
+	movq	VMXCTX_HOST_R12(%rdi),%r12
+	movq	VMXCTX_HOST_RBP(%rdi),%rbp
+	movq	VMXCTX_HOST_RSP(%rdi),%rsp
+	movq	VMXCTX_HOST_RBX(%rdi),%rbx
+	movq	VMXCTX_HOST_RIP(%rdi),%rax
+	movq	%rax,(%rsp)			/* return address */
+
+	/*
+	 * XXX restore host debug registers
+	 */
+	movl	%esi,%eax
+	ret
+END(vmx_return)
+
+/*
+ * void vmx_longjmp(void)
+ * %rsp points to the struct vmxctx
+ */
+ENTRY(vmx_longjmp)
+	/*
+	 * Save guest state that is not automatically saved in the vmcs.
+	 */
+	movq	%rdi,VMXCTX_GUEST_RDI(%rsp)
+	movq	%rsi,VMXCTX_GUEST_RSI(%rsp)
+	movq	%rdx,VMXCTX_GUEST_RDX(%rsp)
+	movq	%rcx,VMXCTX_GUEST_RCX(%rsp)
+	movq	%r8,VMXCTX_GUEST_R8(%rsp)
+	movq	%r9,VMXCTX_GUEST_R9(%rsp)
+	movq	%rax,VMXCTX_GUEST_RAX(%rsp)
+	movq	%rbx,VMXCTX_GUEST_RBX(%rsp)
+	movq	%rbp,VMXCTX_GUEST_RBP(%rsp)
+	movq	%r10,VMXCTX_GUEST_R10(%rsp)
+	movq	%r11,VMXCTX_GUEST_R11(%rsp)
+	movq	%r12,VMXCTX_GUEST_R12(%rsp)
+	movq	%r13,VMXCTX_GUEST_R13(%rsp)
+	movq	%r14,VMXCTX_GUEST_R14(%rsp)
+	movq	%r15,VMXCTX_GUEST_R15(%rsp)
+
+	movq	%cr2,%rdi
+	movq	%rdi,VMXCTX_GUEST_CR2(%rsp)
+
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_LONGJMP,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_longjmp)
+
+/*
+ * void vmx_resume(struct vmxctx *ctxp)
+ * %rdi = ctxp
+ *
+ * Although the return type is a 'void' this function may return indirectly
+ * through vmx_setjmp() with a return value of 2.
+ */
+ENTRY(vmx_resume)
+	VMX_DISABLE_INTERRUPTS
+
+	VMX_CHECK_AST
+
+	/*
+	 * Restore guest state that is not automatically loaded from the vmcs.
+	 */
+	VMX_GUEST_RESTORE
+
+	vmresume
+
+	/*
+	 * Capture the reason why vmresume failed.
+	 */
+	VM_INSTRUCTION_ERROR(%eax)
+
+	/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_VMRESUME,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_resume)
+
+/*
+ * void vmx_launch(struct vmxctx *ctxp)
+ * %rdi = ctxp
+ *
+ * Although the return type is a 'void' this function may return indirectly
+ * through vmx_setjmp() with a return value of 3.
+ */
+ENTRY(vmx_launch)
+	VMX_DISABLE_INTERRUPTS
+
+	VMX_CHECK_AST
+
+	/*
+	 * Restore guest state that is not automatically loaded from the vmcs.
+	 */
+	VMX_GUEST_RESTORE
+
+	vmlaunch
+
+	/*
+	 * Capture the reason why vmlaunch failed.
+	 */
+	VM_INSTRUCTION_ERROR(%eax)
+
+	/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_VMLAUNCH,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_launch)
diff --git a/sys/amd64/vmm/intel/vtd.c b/sys/amd64/vmm/intel/vtd.c
new file mode 100644
index 0000000..ef0e9bc
--- /dev/null
+++ b/sys/amd64/vmm/intel/vtd.c
@@ -0,0 +1,677 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/pci_cfgreg.h>
+
+#include "io/iommu.h"
+
+/*
+ * Documented in the "Intel Virtualization Technology for Directed I/O",
+ * Architecture Spec, September 2008.
+ */
+
+/* Section 10.4 "Register Descriptions" */
+struct vtdmap {
+	volatile uint32_t	version;
+	volatile uint32_t	res0;
+	volatile uint64_t	cap;
+	volatile uint64_t	ext_cap;
+	volatile uint32_t	gcr;
+	volatile uint32_t	gsr;
+	volatile uint64_t	rta;
+	volatile uint64_t	ccr;
+};
+
+#define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
+#define	VTD_CAP_ND(cap)		((cap) & 0x7)
+#define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
+#define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
+#define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
+
+#define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
+#define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
+#define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
+
+#define	VTD_GCR_WBF		(1 << 27)
+#define	VTD_GCR_SRTP		(1 << 30)
+#define	VTD_GCR_TE		(1 << 31)
+
+#define	VTD_GSR_WBFS		(1 << 27)
+#define	VTD_GSR_RTPS		(1 << 30)
+#define	VTD_GSR_TES		(1 << 31)
+
+#define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
+#define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
+
+#define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
+#define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
+#define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
+#define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
+#define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
+#define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
+#define	VTD_IIR_DOMAIN_P	32
+
+#define	VTD_ROOT_PRESENT	0x1
+#define	VTD_CTX_PRESENT		0x1
+#define	VTD_CTX_TT_ALL		(1UL << 2)
+
+#define	VTD_PTE_RD		(1UL << 0)
+#define	VTD_PTE_WR		(1UL << 1)
+#define	VTD_PTE_SUPERPAGE	(1UL << 7)
+#define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
+
+struct domain {
+	uint64_t	*ptp;		/* first level page table page */
+	int		pt_levels;	/* number of page table levels */
+	int		addrwidth;	/* 'AW' field in context entry */
+	int		spsmask;	/* supported super page sizes */
+	u_int		id;		/* domain id */
+	vm_paddr_t	maxaddr;	/* highest address to be mapped */
+	SLIST_ENTRY(domain) next;
+};
+
+static SLIST_HEAD(, domain) domhead;
+
+#define	DRHD_MAX_UNITS	8
+static int		drhd_num;
+static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
+static int		max_domains;
+typedef int		(*drhd_ident_func_t)(void);
+
+static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
+static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
+
+static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
+
+/*
+ * Config space register definitions from the "Intel 5520 and 5500" datasheet.
+ */
+static int
+tylersburg_vtd_ident(void)
+{
+	int units, nlbus;
+	uint16_t did, vid;
+	uint32_t miscsts, vtbar;
+
+	const int bus = 0;
+	const int slot = 20;
+	const int func = 0;
+
+	units = 0;
+
+	vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
+	did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
+	if (vid != 0x8086 || did != 0x342E)
+		goto done;
+
+	/*
+	 * Check if this is a dual IOH configuration.
+	 */
+	miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
+	if (miscsts & (1 << 25))
+		nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
+	else	
+		nlbus = -1;
+
+	vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
+	if (vtbar & 0x1) {
+		vtdmaps[units++] = (struct vtdmap *)
+					PHYS_TO_DMAP(vtbar & 0xffffe000);
+	} else if (bootverbose)
+		printf("VT-d unit in legacy IOH is disabled!\n");
+
+	if (nlbus != -1) {
+		vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
+		if (vtbar & 0x1) {
+			vtdmaps[units++] = (struct vtdmap *)
+					   PHYS_TO_DMAP(vtbar & 0xffffe000);
+		} else if (bootverbose)
+			printf("VT-d unit in non-legacy IOH is disabled!\n");
+	}
+done:
+	return (units);
+}
+
+static drhd_ident_func_t drhd_ident_funcs[] = {
+	tylersburg_vtd_ident,
+	NULL
+};
+
+static int
+vtd_max_domains(struct vtdmap *vtdmap)
+{
+	int nd;
+
+	nd = VTD_CAP_ND(vtdmap->cap);
+
+	switch (nd) {
+	case 0:
+		return (16);
+	case 1:
+		return (64);
+	case 2:
+		return (256);
+	case 3:
+		return (1024);
+	case 4:
+		return (4 * 1024);
+	case 5:
+		return (16 * 1024);
+	case 6:
+		return (64 * 1024);
+	default:
+		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
+	}
+}
+
+static u_int
+domain_id(void)
+{
+	u_int id;
+	struct domain *dom;
+
+	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
+	for (id = 1; id < max_domains; id++) {
+		SLIST_FOREACH(dom, &domhead, next) {
+			if (dom->id == id)
+				break;
+		}
+		if (dom == NULL)
+			break;		/* found it */
+	}
+	
+	if (id >= max_domains)
+		panic("domain ids exhausted");
+
+	return (id);
+}
+
+static void
+vtd_wbflush(struct vtdmap *vtdmap)
+{
+
+	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
+		pmap_invalidate_cache();
+
+	if (VTD_CAP_RWBF(vtdmap->cap)) {
+		vtdmap->gcr = VTD_GCR_WBF;
+		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
+			;
+	}
+}
+
+static void
+vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
+{
+
+	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
+	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
+		;
+}
+
+static void
+vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
+{
+	int offset;
+	volatile uint64_t *iotlb_reg, val;
+
+	vtd_wbflush(vtdmap);
+
+	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
+	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
+	
+	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
+		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
+
+	while (1) {
+		val = *iotlb_reg;
+		if ((val & VTD_IIR_IVT) == 0)
+			break;
+	}
+}
+
+static void
+vtd_translation_enable(struct vtdmap *vtdmap)
+{
+
+	vtdmap->gcr = VTD_GCR_TE;
+	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
+		;
+}
+
+static void
+vtd_translation_disable(struct vtdmap *vtdmap)
+{
+
+	vtdmap->gcr = 0;
+	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
+		;
+}
+
+static int
+vtd_init(void)
+{
+	int i, units;
+	struct vtdmap *vtdmap;
+	vm_paddr_t ctx_paddr;
+	
+	for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
+		units = (*drhd_ident_funcs[i])();
+		if (units > 0)
+			break;
+	}
+
+	if (units <= 0)
+		return (ENXIO);
+
+	drhd_num = units;
+	vtdmap = vtdmaps[0];
+
+	if (VTD_CAP_CM(vtdmap->cap) != 0)
+		panic("vtd_init: invalid caching mode");
+
+	max_domains = vtd_max_domains(vtdmap);
+
+	/*
+	 * Set up the root-table to point to the context-entry tables
+	 */
+	for (i = 0; i < 256; i++) {
+		ctx_paddr = vtophys(ctx_tables[i]);
+		if (ctx_paddr & PAGE_MASK)
+			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
+
+		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
+	}
+
+	return (0);
+}
+
+static void
+vtd_cleanup(void)
+{
+}
+
+static void
+vtd_enable(void)
+{
+	int i;
+	struct vtdmap *vtdmap;
+
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		vtd_wbflush(vtdmap);
+
+		/* Update the root table address */
+		vtdmap->rta = vtophys(root_table);
+		vtdmap->gcr = VTD_GCR_SRTP;
+		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
+			;
+
+		vtd_ctx_global_invalidate(vtdmap);
+		vtd_iotlb_global_invalidate(vtdmap);
+
+		vtd_translation_enable(vtdmap);
+	}
+}
+
+static void
+vtd_disable(void)
+{
+	int i;
+	struct vtdmap *vtdmap;
+
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		vtd_translation_disable(vtdmap);
+	}
+}
+
+static void
+vtd_add_device(void *arg, int bus, int slot, int func)
+{
+	int idx;
+	uint64_t *ctxp;
+	struct domain *dom = arg;
+	vm_paddr_t pt_paddr;
+	struct vtdmap *vtdmap;
+
+	if (bus < 0 || bus > PCI_BUSMAX ||
+	    slot < 0 || slot > PCI_SLOTMAX ||
+	    func < 0 || func > PCI_FUNCMAX)
+		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
+
+	vtdmap = vtdmaps[0];
+	ctxp = ctx_tables[bus];
+	pt_paddr = vtophys(dom->ptp);
+	idx = (slot << 3 | func) * 2;
+
+	if (ctxp[idx] & VTD_CTX_PRESENT) {
+		panic("vtd_add_device: device %d/%d/%d is already owned by "
+		      "domain %d", bus, slot, func,
+		      (uint16_t)(ctxp[idx + 1] >> 8));
+	}
+
+	/*
+	 * Order is important. The 'present' bit is set only after all fields
+	 * of the context pointer are initialized.
+	 */
+	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
+
+	if (VTD_ECAP_DI(vtdmap->ext_cap))
+		ctxp[idx] = VTD_CTX_TT_ALL;
+	else
+		ctxp[idx] = 0;
+
+	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
+
+	/*
+	 * 'Not Present' entries are not cached in either the Context Cache
+	 * or in the IOTLB, so there is no need to invalidate either of them.
+	 */
+}
+
+static void
+vtd_remove_device(void *arg, int bus, int slot, int func)
+{
+	int i, idx;
+	uint64_t *ctxp;
+	struct vtdmap *vtdmap;
+
+	if (bus < 0 || bus > PCI_BUSMAX ||
+	    slot < 0 || slot > PCI_SLOTMAX ||
+	    func < 0 || func > PCI_FUNCMAX)
+		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
+
+	ctxp = ctx_tables[bus];
+	idx = (slot << 3 | func) * 2;
+
+	/*
+	 * Order is important. The 'present' bit is must be cleared first.
+	 */
+	ctxp[idx] = 0;
+	ctxp[idx + 1] = 0;
+
+	/*
+	 * Invalidate the Context Cache and the IOTLB.
+	 *
+	 * XXX use device-selective invalidation for Context Cache
+	 * XXX use domain-selective invalidation for IOTLB
+	 */
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		vtd_ctx_global_invalidate(vtdmap);
+		vtd_iotlb_global_invalidate(vtdmap);
+	}
+}
+
+#define	CREATE_MAPPING	0
+#define	REMOVE_MAPPING	1
+
+static uint64_t
+vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
+		   int remove)
+{
+	struct domain *dom;
+	int i, spshift, ptpshift, ptpindex, nlevels;
+	uint64_t spsize, *ptp;
+
+	dom = arg;
+	ptpindex = 0;
+	ptpshift = 0;
+
+	if (gpa & PAGE_MASK)
+		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
+
+	if (hpa & PAGE_MASK)
+		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
+
+	if (len & PAGE_MASK)
+		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
+
+	/*
+	 * Compute the size of the mapping that we can accomodate.
+	 *
+	 * This is based on three factors:
+	 * - supported super page size
+	 * - alignment of the region starting at 'gpa' and 'hpa'
+	 * - length of the region 'len'
+	 */
+	spshift = 48;
+	for (i = 3; i >= 0; i--) {
+		spsize = 1UL << spshift;
+		if ((dom->spsmask & (1 << i)) != 0 &&
+		    (gpa & (spsize - 1)) == 0 &&
+		    (hpa & (spsize - 1)) == 0 &&
+		    (len >= spsize)) {
+			break;
+		}
+		spshift -= 9;
+	}
+
+	ptp = dom->ptp;
+	nlevels = dom->pt_levels;
+	while (--nlevels >= 0) {
+		ptpshift = 12 + nlevels * 9;
+		ptpindex = (gpa >> ptpshift) & 0x1FF;
+
+		/* We have reached the leaf mapping */
+		if (spshift >= ptpshift) {
+			break;
+		}
+
+		/*
+		 * We are working on a non-leaf page table page.
+		 *
+		 * Create a downstream page table page if necessary and point
+		 * to it from the current page table.
+		 */
+		if (ptp[ptpindex] == 0) {
+			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
+			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
+		}
+
+		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
+	}
+
+	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
+		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
+
+	/*
+	 * Update the 'gpa' -> 'hpa' mapping
+	 */
+	if (remove) {
+		ptp[ptpindex] = 0;
+	} else {
+		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
+
+		if (nlevels > 0)
+			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
+	}
+
+	return (1UL << ptpshift);
+}
+
+static uint64_t
+vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
+{
+
+	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
+}
+
+static uint64_t
+vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
+{
+
+	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
+}
+
+static void
+vtd_invalidate_tlb(void *dom)
+{
+	int i;
+	struct vtdmap *vtdmap;
+
+	/*
+	 * Invalidate the IOTLB.
+	 * XXX use domain-selective invalidation for IOTLB
+	 */
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		vtd_iotlb_global_invalidate(vtdmap);
+	}
+}
+
+static void *
+vtd_create_domain(vm_paddr_t maxaddr)
+{
+	struct domain *dom;
+	vm_paddr_t addr;
+	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
+	struct vtdmap *vtdmap;
+
+	if (drhd_num <= 0)
+		panic("vtd_create_domain: no dma remapping hardware available");
+
+	vtdmap = vtdmaps[0];
+
+	/*
+	 * Calculate AGAW.
+	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
+	 */
+	addr = 0;
+	for (gaw = 0; addr < maxaddr; gaw++)
+		addr = 1ULL << gaw;
+
+	res = (gaw - 12) % 9;
+	if (res == 0)
+		agaw = gaw;
+	else
+		agaw = gaw + 9 - res;
+
+	if (agaw > 64)
+		agaw = 64;
+
+	/*
+	 * Select the smallest Supported AGAW and the corresponding number
+	 * of page table levels.
+	 */
+	pt_levels = 2;
+	sagaw = 30;
+	addrwidth = 0;
+	tmp = VTD_CAP_SAGAW(vtdmap->cap);
+	for (i = 0; i < 5; i++) {
+		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
+			break;
+		pt_levels++;
+		addrwidth++;
+		sagaw += 9;
+		if (sagaw > 64)
+			sagaw = 64;
+	}
+
+	if (i >= 5) {
+		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
+		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
+	}
+
+	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
+	dom->pt_levels = pt_levels;
+	dom->addrwidth = addrwidth;
+	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
+	dom->id = domain_id();
+	dom->maxaddr = maxaddr;
+	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
+	if ((uintptr_t)dom->ptp & PAGE_MASK)
+		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
+
+	SLIST_INSERT_HEAD(&domhead, dom, next);
+
+	return (dom);
+}
+
+static void
+vtd_free_ptp(uint64_t *ptp, int level)
+{
+	int i;
+	uint64_t *nlp;
+
+	if (level > 1) {
+		for (i = 0; i < 512; i++) {
+			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
+				continue;
+			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
+				continue;
+			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
+			vtd_free_ptp(nlp, level - 1);
+		}
+	}
+
+	bzero(ptp, PAGE_SIZE);
+	free(ptp, M_VTD);
+}
+
+static void
+vtd_destroy_domain(void *arg)
+{
+	struct domain *dom;
+	
+	dom = arg;
+
+	SLIST_REMOVE(&domhead, dom, domain, next);
+	vtd_free_ptp(dom->ptp, dom->pt_levels);
+	free(dom, M_VTD);
+}
+
+struct iommu_ops iommu_ops_intel = {
+	vtd_init,
+	vtd_cleanup,
+	vtd_enable,
+	vtd_disable,
+	vtd_create_domain,
+	vtd_destroy_domain,
+	vtd_create_mapping,
+	vtd_remove_mapping,
+	vtd_add_device,
+	vtd_remove_device,
+	vtd_invalidate_tlb,
+};
diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c
new file mode 100644
index 0000000..c8447cc
--- /dev/null
+++ b/sys/amd64/vmm/io/iommu.c
@@ -0,0 +1,277 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/md_var.h>
+
+#include "vmm_util.h"
+#include "vmm_mem.h"
+#include "iommu.h"
+
+static boolean_t iommu_avail;
+static struct iommu_ops *ops;
+static void *host_domain;
+
+static __inline int
+IOMMU_INIT(void)
+{
+	if (ops != NULL)
+		return ((*ops->init)());
+	else
+		return (ENXIO);
+}
+
+static __inline void
+IOMMU_CLEANUP(void)
+{
+	if (ops != NULL && iommu_avail)
+		(*ops->cleanup)();
+}
+
+static __inline void *
+IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr)
+{
+
+	if (ops != NULL && iommu_avail)
+		return ((*ops->create_domain)(maxaddr));
+	else
+		return (NULL);
+}
+
+static __inline void
+IOMMU_DESTROY_DOMAIN(void *dom)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->destroy_domain)(dom);
+}
+
+static __inline uint64_t
+IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
+{
+
+	if (ops != NULL && iommu_avail)
+		return ((*ops->create_mapping)(domain, gpa, hpa, len));
+	else
+		return (len);		/* XXX */
+}
+
+static __inline uint64_t
+IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len)
+{
+
+	if (ops != NULL && iommu_avail)
+		return ((*ops->remove_mapping)(domain, gpa, len));
+	else
+		return (len);		/* XXX */
+}
+
+static __inline void
+IOMMU_ADD_DEVICE(void *domain, int bus, int slot, int func)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->add_device)(domain, bus, slot, func);
+}
+
+static __inline void
+IOMMU_REMOVE_DEVICE(void *domain, int bus, int slot, int func)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->remove_device)(domain, bus, slot, func);
+}
+
+static __inline void
+IOMMU_INVALIDATE_TLB(void *domain)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->invalidate_tlb)(domain);
+}
+
+static __inline void
+IOMMU_ENABLE(void)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->enable)();
+}
+
+static __inline void
+IOMMU_DISABLE(void)
+{
+
+	if (ops != NULL && iommu_avail)
+		(*ops->disable)();
+}
+
+void
+iommu_init(void)
+{
+	int error, bus, slot, func;
+	vm_paddr_t maxaddr;
+	const char *name;
+	device_t dev;
+
+	if (vmm_is_intel())
+		ops = &iommu_ops_intel;
+	else if (vmm_is_amd())
+		ops = &iommu_ops_amd;
+	else
+		ops = NULL;
+
+	error = IOMMU_INIT();
+	if (error)
+		return;
+
+	iommu_avail = TRUE;
+
+	/*
+	 * Create a domain for the devices owned by the host
+	 */
+	maxaddr = vmm_mem_maxaddr();
+	host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
+	if (host_domain == NULL)
+		panic("iommu_init: unable to create a host domain");
+
+	/*
+	 * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to
+	 * the host
+	 */
+	iommu_create_mapping(host_domain, 0, 0, maxaddr);
+
+	for (bus = 0; bus <= PCI_BUSMAX; bus++) {
+		for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
+			for (func = 0; func <= PCI_FUNCMAX; func++) {
+				dev = pci_find_dbsf(0, bus, slot, func);
+				if (dev == NULL)
+					continue;
+
+				/* skip passthrough devices */
+				name = device_get_name(dev);
+				if (name != NULL && strcmp(name, "ppt") == 0)
+					continue;
+
+				/* everything else belongs to the host domain */
+				iommu_add_device(host_domain, bus, slot, func);
+			}
+		}
+	}
+	IOMMU_ENABLE();
+
+}
+
+void
+iommu_cleanup(void)
+{
+	IOMMU_DISABLE();
+	IOMMU_DESTROY_DOMAIN(host_domain);
+	IOMMU_CLEANUP();
+}
+
+void *
+iommu_create_domain(vm_paddr_t maxaddr)
+{
+
+	return (IOMMU_CREATE_DOMAIN(maxaddr));
+}
+
+void
+iommu_destroy_domain(void *dom)
+{
+
+	IOMMU_DESTROY_DOMAIN(dom);
+}
+
+void
+iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
+{
+	uint64_t mapped, remaining;
+
+	remaining = len;
+
+	while (remaining > 0) {
+		mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
+		gpa += mapped;
+		hpa += mapped;
+		remaining -= mapped;
+	}
+}
+
+void
+iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len)
+{
+	uint64_t unmapped, remaining;
+
+	remaining = len;
+
+	while (remaining > 0) {
+		unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining);
+		gpa += unmapped;
+		remaining -= unmapped;
+	}
+}
+
+void *
+iommu_host_domain(void)
+{
+
+	return (host_domain);
+}
+
+void
+iommu_add_device(void *dom, int bus, int slot, int func)
+{
+
+	IOMMU_ADD_DEVICE(dom, bus, slot, func);
+}
+
+void
+iommu_remove_device(void *dom, int bus, int slot, int func)
+{
+
+	IOMMU_REMOVE_DEVICE(dom, bus, slot, func);
+}
+
+void
+iommu_invalidate_tlb(void *domain)
+{
+
+	IOMMU_INVALIDATE_TLB(domain);
+}
diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h
new file mode 100644
index 0000000..d5c1d6e
--- /dev/null
+++ b/sys/amd64/vmm/io/iommu.h
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IO_IOMMU_H_
+#define	_IO_IOMMU_H_
+
+typedef int (*iommu_init_func_t)(void);
+typedef void (*iommu_cleanup_func_t)(void);
+typedef void (*iommu_enable_func_t)(void);
+typedef void (*iommu_disable_func_t)(void);
+typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr);
+typedef void (*iommu_destroy_domain_t)(void *domain);
+typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
+					   vm_paddr_t hpa, uint64_t len);
+typedef uint64_t (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa,
+					   uint64_t len);
+typedef void (*iommu_add_device_t)(void *domain, int bus, int slot, int func);
+typedef void (*iommu_remove_device_t)(void *dom, int bus, int slot, int func);
+typedef void (*iommu_invalidate_tlb_t)(void *dom);
+
+struct iommu_ops {
+	iommu_init_func_t	init;		/* module wide */
+	iommu_cleanup_func_t	cleanup;
+	iommu_enable_func_t	enable;
+	iommu_disable_func_t	disable;
+
+	iommu_create_domain_t	create_domain;	/* domain-specific */
+	iommu_destroy_domain_t	destroy_domain;
+	iommu_create_mapping_t	create_mapping;
+	iommu_remove_mapping_t	remove_mapping;
+	iommu_add_device_t	add_device;
+	iommu_remove_device_t	remove_device;
+	iommu_invalidate_tlb_t	invalidate_tlb;
+};
+
+extern struct iommu_ops iommu_ops_intel;
+extern struct iommu_ops iommu_ops_amd;
+
+void	iommu_init(void);
+void	iommu_cleanup(void);
+void	*iommu_host_domain(void);
+void	*iommu_create_domain(vm_paddr_t maxaddr);
+void	iommu_destroy_domain(void *dom);
+void	iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
+			     size_t len);
+void	iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len);
+void	iommu_add_device(void *dom, int bus, int slot, int func);
+void	iommu_remove_device(void *dom, int bus, int slot, int func);
+void	iommu_invalidate_tlb(void *domain);
+#endif
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
new file mode 100644
index 0000000..5aedaf2
--- /dev/null
+++ b/sys/amd64/vmm/io/ppt.c
@@ -0,0 +1,594 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/pciio.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/resource.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmm_lapic.h"
+#include "vmm_ktr.h"
+
+#include "iommu.h"
+#include "ppt.h"
+
+/* XXX locking */
+
+#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
+#define	MAX_MSIMSGS	32
+
+/*
+ * If the MSI-X table is located in the middle of a BAR then that MMIO
+ * region gets split into two segments - one segment above the MSI-X table
+ * and the other segment below the MSI-X table - with a hole in place of
+ * the MSI-X table so accesses to it can be trapped and emulated.
+ *
+ * So, allocate a MMIO segment for each BAR register + 1 additional segment.
+ */
+#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
+
+MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
+
+struct pptintr_arg {				/* pptintr(pptintr_arg) */
+	struct pptdev	*pptdev;
+	int		vec;
+	int 		vcpu;
+};
+
+static struct pptdev {
+	device_t	dev;
+	struct vm	*vm;			/* owner of this device */
+	struct vm_memory_segment mmio[MAX_MMIOSEGS];
+	struct {
+		int	num_msgs;		/* guest state */
+
+		int	startrid;		/* host state */
+		struct resource *res[MAX_MSIMSGS];
+		void	*cookie[MAX_MSIMSGS];
+		struct pptintr_arg arg[MAX_MSIMSGS];
+	} msi;
+
+	struct {
+		int num_msgs;
+		int startrid;
+		int msix_table_rid;
+		struct resource *msix_table_res;
+		struct resource **res;
+		void **cookie;
+		struct pptintr_arg *arg;
+	} msix;
+} pptdevs[64];
+
+static int num_pptdevs;
+
+static int
+ppt_probe(device_t dev)
+{
+	int bus, slot, func;
+	struct pci_devinfo *dinfo;
+
+	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
+
+	bus = pci_get_bus(dev);
+	slot = pci_get_slot(dev);
+	func = pci_get_function(dev);
+
+	/*
+	 * To qualify as a pci passthrough device a device must:
+	 * - be allowed by administrator to be used in this role
+	 * - be an endpoint device
+	 */
+	if (vmm_is_pptdev(bus, slot, func) &&
+	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
+		return (0);
+	else
+		return (ENXIO);
+}
+
+static int
+ppt_attach(device_t dev)
+{
+	int n;
+
+	if (num_pptdevs >= MAX_PPTDEVS) {
+		printf("ppt_attach: maximum number of pci passthrough devices "
+		       "exceeded\n");
+		return (ENXIO);
+	}
+
+	n = num_pptdevs++;
+	pptdevs[n].dev = dev;
+
+	if (bootverbose)
+		device_printf(dev, "attached\n");
+
+	return (0);
+}
+
+static int
+ppt_detach(device_t dev)
+{
+	/*
+	 * XXX check whether there are any pci passthrough devices assigned
+	 * to guests before we allow this driver to detach.
+	 */
+
+	return (0);
+}
+
+static device_method_t ppt_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		ppt_probe),
+	DEVMETHOD(device_attach,	ppt_attach),
+	DEVMETHOD(device_detach,	ppt_detach),
+	{0, 0}
+};
+
+static devclass_t ppt_devclass;
+DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
+DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
+
+static struct pptdev *
+ppt_find(int bus, int slot, int func)
+{
+	device_t dev;
+	int i, b, s, f;
+
+	for (i = 0; i < num_pptdevs; i++) {
+		dev = pptdevs[i].dev;
+		b = pci_get_bus(dev);
+		s = pci_get_slot(dev);
+		f = pci_get_function(dev);
+		if (bus == b && slot == s && func == f)
+			return (&pptdevs[i]);
+	}
+	return (NULL);
+}
+
+static void
+ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
+{
+	int i;
+	struct vm_memory_segment *seg;
+
+	for (i = 0; i < MAX_MMIOSEGS; i++) {
+		seg = &ppt->mmio[i];
+		if (seg->len == 0)
+			continue;
+		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
+		bzero(seg, sizeof(struct vm_memory_segment));
+	}
+}
+
+static void
+ppt_teardown_msi(struct pptdev *ppt)
+{
+	int i, rid;
+	void *cookie;
+	struct resource *res;
+
+	if (ppt->msi.num_msgs == 0)
+		return;
+
+	for (i = 0; i < ppt->msi.num_msgs; i++) {
+		rid = ppt->msi.startrid + i;
+		res = ppt->msi.res[i];
+		cookie = ppt->msi.cookie[i];
+
+		if (cookie != NULL)
+			bus_teardown_intr(ppt->dev, res, cookie);
+
+		if (res != NULL)
+			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
+		
+		ppt->msi.res[i] = NULL;
+		ppt->msi.cookie[i] = NULL;
+	}
+
+	if (ppt->msi.startrid == 1)
+		pci_release_msi(ppt->dev);
+
+	ppt->msi.num_msgs = 0;
+}
+
+static void 
+ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
+{
+	int rid;
+	struct resource *res;
+	void *cookie;
+
+	rid = ppt->msix.startrid + idx;
+	res = ppt->msix.res[idx];
+	cookie = ppt->msix.cookie[idx];
+
+	if (cookie != NULL) 
+		bus_teardown_intr(ppt->dev, res, cookie);
+
+	if (res != NULL) 
+		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
+
+	ppt->msix.res[idx] = NULL;
+	ppt->msix.cookie[idx] = NULL;
+}
+
+static void 
+ppt_teardown_msix(struct pptdev *ppt)
+{
+	int i;
+
+	if (ppt->msix.num_msgs == 0) 
+		return;
+
+	for (i = 0; i < ppt->msix.num_msgs; i++) 
+		ppt_teardown_msix_intr(ppt, i);
+
+	if (ppt->msix.msix_table_res) {
+		bus_release_resource(ppt->dev, SYS_RES_MEMORY, 
+				     ppt->msix.msix_table_rid,
+				     ppt->msix.msix_table_res);
+		ppt->msix.msix_table_res = NULL;
+		ppt->msix.msix_table_rid = 0;
+	}
+
+	free(ppt->msix.res, M_PPTMSIX);
+	free(ppt->msix.cookie, M_PPTMSIX);
+	free(ppt->msix.arg, M_PPTMSIX);
+
+	pci_release_msi(ppt->dev);
+
+	ppt->msix.num_msgs = 0;
+}
+
+int
+ppt_assign_device(struct vm *vm, int bus, int slot, int func)
+{
+	struct pptdev *ppt;
+
+	ppt = ppt_find(bus, slot, func);
+	if (ppt != NULL) {
+		/*
+		 * If this device is owned by a different VM then we
+		 * cannot change its owner.
+		 */
+		if (ppt->vm != NULL && ppt->vm != vm)
+			return (EBUSY);
+
+		ppt->vm = vm;
+		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
+		return (0);
+	}
+	return (ENOENT);
+}
+
+int
+ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
+{
+	struct pptdev *ppt;
+
+	ppt = ppt_find(bus, slot, func);
+	if (ppt != NULL) {
+		/*
+		 * If this device is not owned by this 'vm' then bail out.
+		 */
+		if (ppt->vm != vm)
+			return (EBUSY);
+		ppt_unmap_mmio(vm, ppt);
+		ppt_teardown_msi(ppt);
+		ppt_teardown_msix(ppt);
+		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
+		ppt->vm = NULL;
+		return (0);
+	}
+	return (ENOENT);
+}
+
+int
+ppt_unassign_all(struct vm *vm)
+{
+	int i, bus, slot, func;
+	device_t dev;
+
+	for (i = 0; i < num_pptdevs; i++) {
+		if (pptdevs[i].vm == vm) {
+			dev = pptdevs[i].dev;
+			bus = pci_get_bus(dev);
+			slot = pci_get_slot(dev);
+			func = pci_get_function(dev);
+			ppt_unassign_device(vm, bus, slot, func);
+		}
+	}
+
+	return (0);
+}
+
+int
+ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
+	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
+{
+	int i, error;
+	struct vm_memory_segment *seg;
+	struct pptdev *ppt;
+
+	ppt = ppt_find(bus, slot, func);
+	if (ppt != NULL) {
+		if (ppt->vm != vm)
+			return (EBUSY);
+
+		for (i = 0; i < MAX_MMIOSEGS; i++) {
+			seg = &ppt->mmio[i];
+			if (seg->len == 0) {
+				error = vm_map_mmio(vm, gpa, len, hpa);
+				if (error == 0) {
+					seg->gpa = gpa;
+					seg->len = len;
+				}
+				return (error);
+			}
+		}
+		return (ENOSPC);
+	}
+	return (ENOENT);
+}
+
+static int
+pptintr(void *arg)
+{
+	int vec;
+	struct pptdev *ppt;
+	struct pptintr_arg *pptarg;
+	
+	pptarg = arg;
+	ppt = pptarg->pptdev;
+	vec = pptarg->vec;
+
+	if (ppt->vm != NULL)
+		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
+	else {
+		/*
+		 * XXX
+		 * This is not expected to happen - panic?
+		 */
+	}
+
+	/*
+	 * For legacy interrupts give other filters a chance in case
+	 * the interrupt was not generated by the passthrough device.
+	 */
+	if (ppt->msi.startrid == 0)
+		return (FILTER_STRAY);
+	else
+		return (FILTER_HANDLED);
+}
+
+int
+ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
+	      int destcpu, int vector, int numvec)
+{
+	int i, rid, flags;
+	int msi_count, startrid, error, tmp;
+	struct pptdev *ppt;
+
+	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
+	    (vector < 0 || vector > 255) ||
+	    (numvec < 0 || numvec > MAX_MSIMSGS))
+		return (EINVAL);
+
+	ppt = ppt_find(bus, slot, func);
+	if (ppt == NULL)
+		return (ENOENT);
+	if (ppt->vm != vm)		/* Make sure we own this device */
+		return (EBUSY);
+
+	/* Free any allocated resources */
+	ppt_teardown_msi(ppt);
+
+	if (numvec == 0)		/* nothing more to do */
+		return (0);
+
+	flags = RF_ACTIVE;
+	msi_count = pci_msi_count(ppt->dev);
+	if (msi_count == 0) {
+		startrid = 0;		/* legacy interrupt */
+		msi_count = 1;
+		flags |= RF_SHAREABLE;
+	} else
+		startrid = 1;		/* MSI */
+
+	/*
+	 * The device must be capable of supporting the number of vectors
+	 * the guest wants to allocate.
+	 */
+	if (numvec > msi_count)
+		return (EINVAL);
+
+	/*
+	 * Make sure that we can allocate all the MSI vectors that are needed
+	 * by the guest.
+	 */
+	if (startrid == 1) {
+		tmp = numvec;
+		error = pci_alloc_msi(ppt->dev, &tmp);
+		if (error)
+			return (error);
+		else if (tmp != numvec) {
+			pci_release_msi(ppt->dev);
+			return (ENOSPC);
+		} else {
+			/* success */
+		}
+	}
+	
+	ppt->msi.startrid = startrid;
+
+	/*
+	 * Allocate the irq resource and attach it to the interrupt handler.
+	 */
+	for (i = 0; i < numvec; i++) {
+		ppt->msi.num_msgs = i + 1;
+		ppt->msi.cookie[i] = NULL;
+
+		rid = startrid + i;
+		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
+							 &rid, flags);
+		if (ppt->msi.res[i] == NULL)
+			break;
+
+		ppt->msi.arg[i].pptdev = ppt;
+		ppt->msi.arg[i].vec = vector + i;
+		ppt->msi.arg[i].vcpu = destcpu;
+
+		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
+				       INTR_TYPE_NET | INTR_MPSAFE,
+				       pptintr, NULL, &ppt->msi.arg[i],
+				       &ppt->msi.cookie[i]);
+		if (error != 0)
+			break;
+	}
+	
+	if (i < numvec) {
+		ppt_teardown_msi(ppt);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+int
+ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
+	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+{
+	struct pptdev *ppt;
+	struct pci_devinfo *dinfo;
+	int numvec, alloced, rid, error;
+	size_t res_size, cookie_size, arg_size;
+
+	ppt = ppt_find(bus, slot, func);
+	if (ppt == NULL)
+		return (ENOENT);
+	if (ppt->vm != vm)		/* Make sure we own this device */
+		return (EBUSY);
+
+	dinfo = device_get_ivars(ppt->dev);
+	if (!dinfo) 
+		return (ENXIO);
+
+	/* 
+	 * First-time configuration:
+	 * 	Allocate the MSI-X table
+	 *	Allocate the IRQ resources
+	 *	Set up some variables in ppt->msix
+	 */
+	if (ppt->msix.num_msgs == 0) {
+		numvec = pci_msix_count(ppt->dev);
+		if (numvec <= 0)
+			return (EINVAL);
+
+		ppt->msix.startrid = 1;
+		ppt->msix.num_msgs = numvec;
+
+		res_size = numvec * sizeof(ppt->msix.res[0]);
+		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
+		arg_size = numvec * sizeof(ppt->msix.arg[0]);
+
+		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
+		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
+					  M_WAITOK | M_ZERO);
+		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
+
+		rid = dinfo->cfg.msix.msix_table_bar;
+		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
+					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
+
+		if (ppt->msix.msix_table_res == NULL) {
+			ppt_teardown_msix(ppt);
+			return (ENOSPC);
+		}
+		ppt->msix.msix_table_rid = rid;
+
+		alloced = numvec;
+		error = pci_alloc_msix(ppt->dev, &alloced);
+		if (error || alloced != numvec) {
+			ppt_teardown_msix(ppt);
+			return (error == 0 ? ENOSPC: error);
+		}
+	}
+
+	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
+		/* Tear down the IRQ if it's already set up */
+		ppt_teardown_msix_intr(ppt, idx);
+
+		/* Allocate the IRQ resource */
+		ppt->msix.cookie[idx] = NULL;
+		rid = ppt->msix.startrid + idx;
+		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
+							    &rid, RF_ACTIVE);
+		if (ppt->msix.res[idx] == NULL)
+			return (ENXIO);
+	
+		ppt->msix.arg[idx].pptdev = ppt;
+		ppt->msix.arg[idx].vec = msg;
+		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
+	
+		/* Setup the MSI-X interrupt */
+		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
+				       INTR_TYPE_NET | INTR_MPSAFE,
+				       pptintr, NULL, &ppt->msix.arg[idx],
+				       &ppt->msix.cookie[idx]);
+	
+		if (error != 0) {
+			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
+			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
+			ppt->msix.cookie[idx] = NULL;
+			ppt->msix.res[idx] = NULL;
+			return (ENXIO);
+		}
+	} else {
+		/* Masked, tear it down if it's already been set up */
+		ppt_teardown_msix_intr(ppt, idx);
+	}
+
+	return (0);
+}
+
diff --git a/sys/amd64/vmm/io/ppt.h b/sys/amd64/vmm/io/ppt.h
new file mode 100644
index 0000000..63c8228
--- /dev/null
+++ b/sys/amd64/vmm/io/ppt.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IO_PPT_H_
+#define	_IO_PPT_H_
+
+int	ppt_assign_device(struct vm *vm, int bus, int slot, int func);
+int	ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
+int	ppt_unassign_all(struct vm *vm);
+int	ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
+		     vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+int	ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
+		      int destcpu, int vector, int numvec);
+int	ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
+		       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+#endif
diff --git a/sys/amd64/vmm/io/vdev.c b/sys/amd64/vmm/io/vdev.c
new file mode 100644
index 0000000..cd6c5d1
--- /dev/null
+++ b/sys/amd64/vmm/io/vdev.c
@@ -0,0 +1,270 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include "vdev.h"
+
+struct vdev {
+	SLIST_ENTRY(vdev) 	 entry;
+	struct vdev_ops 	*ops;
+	void			*dev;
+};
+static SLIST_HEAD(, vdev)	vdev_head;
+static int 		  	vdev_count;
+
+struct vdev_region {
+	SLIST_ENTRY(vdev_region) 	 entry;
+	struct vdev_ops 		*ops;
+	void				*dev;
+	struct io_region		*io;
+};
+static SLIST_HEAD(, vdev_region)	 region_head;
+static int 		  		 region_count;
+
+static MALLOC_DEFINE(M_VDEV, "vdev", "vdev");
+
+#define VDEV_INIT 	(0)
+#define VDEV_RESET	(1)
+#define VDEV_HALT	(2)
+
+// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"};
+
+static int
+vdev_system_event(int event)
+{
+	struct vdev 	*vd;
+	int		 rc;
+
+	// TODO: locking
+	SLIST_FOREACH(vd, &vdev_head, entry) {
+		// printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name);
+		switch (event) {
+			case VDEV_INIT:
+				rc = vd->ops->init(vd->dev);
+				break;
+			case VDEV_RESET:
+				rc = vd->ops->reset(vd->dev);
+				break;
+			case VDEV_HALT:
+				rc = vd->ops->halt(vd->dev);
+				break;
+			default:
+				break;
+		}
+		if (rc) {
+			printf("vdev %s init failed rc=%d\n",
+			    vd->ops->name, rc);
+			return rc;
+		}
+	}
+	return 0;
+}
+
+int
+vdev_init(void)
+{
+	return vdev_system_event(VDEV_INIT);
+}
+
+int
+vdev_reset(void)
+{
+	return vdev_system_event(VDEV_RESET);
+}
+
+int
+vdev_halt(void)
+{
+	return vdev_system_event(VDEV_HALT);
+}
+
+void
+vdev_vm_init(void)
+{
+	SLIST_INIT(&vdev_head);
+	vdev_count = 0;
+
+	SLIST_INIT(&region_head);
+	region_count = 0;
+}
+void
+vdev_vm_cleanup(void)
+{
+	struct vdev *vd;
+     
+	// TODO: locking
+	while (!SLIST_EMPTY(&vdev_head)) {
+		vd = SLIST_FIRST(&vdev_head);
+		SLIST_REMOVE_HEAD(&vdev_head, entry);
+		free(vd, M_VDEV);
+		vdev_count--;
+	}
+}
+
+int
+vdev_register(struct vdev_ops *ops, void *dev)
+{
+	struct vdev *vd;
+	vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO); 
+	vd->ops = ops;
+	vd->dev = dev;
+	
+	// TODO: locking
+	SLIST_INSERT_HEAD(&vdev_head, vd, entry); 
+	vdev_count++;
+	return 0;
+}
+
+void
+vdev_unregister(void *dev)
+{
+	struct vdev 	*vd, *found;
+
+	found = NULL;
+	// TODO: locking
+	SLIST_FOREACH(vd, &vdev_head, entry) {
+		if (vd->dev == dev) {
+			found = vd;
+		}
+	}
+
+	if (found) {
+		SLIST_REMOVE(&vdev_head, found, vdev, entry);
+		free(found, M_VDEV);
+	}
+}
+
+#define IN_RANGE(val, start, end)	\
+    (((val) >= (start)) && ((val) < (end)))
+
+static struct vdev_region*
+vdev_find_region(struct io_region *io, void *dev) 
+{
+	struct 		vdev_region *region, *found;
+	uint64_t	region_base;
+	uint64_t	region_end;
+
+	found = NULL;
+
+	// TODO: locking
+	// FIXME: we should verify we are in the context the current
+	// 	  vcpu here as well.
+	SLIST_FOREACH(region, &region_head, entry) {
+		region_base = region->io->base;
+		region_end = region_base + region->io->len;
+		if (IN_RANGE(io->base, region_base, region_end) &&
+		    IN_RANGE(io->base+io->len, region_base, region_end+1) &&
+		    (dev && dev == region->dev)) {
+			found = region;
+			break;
+		}
+	}
+	return found;
+}
+
+int
+vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io)
+{
+	struct vdev_region *region;
+
+	region = vdev_find_region(io, dev);
+	if (region) {
+		return -EEXIST;
+	}
+
+	region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO);
+	region->io = io;
+	region->ops = ops;
+	region->dev = dev;
+
+	// TODO: locking
+	SLIST_INSERT_HEAD(&region_head, region, entry); 
+	region_count++;
+
+	return 0;
+}
+
+void
+vdev_unregister_region(void *dev, struct io_region *io)
+{
+	struct vdev_region *region;
+
+	region = vdev_find_region(io, dev);
+	
+	if (region) {
+		SLIST_REMOVE(&region_head, region, vdev_region, entry);
+		free(region, M_VDEV);
+		region_count--;
+	}
+}
+
+static int
+vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read)
+{
+	struct vdev_region 	*region;
+	struct io_region	 io;
+	region_attr_t		 attr;
+	int			 rc;
+
+	io.base = gpa;
+	io.len = size;
+
+	region = vdev_find_region(&io, NULL);
+	if (!region)
+		return -EINVAL;
+	
+	attr = (read) ? MMIO_READ : MMIO_WRITE;
+	if (!(region->io->attr & attr))
+		return -EPERM;
+
+	if (read)
+		rc = region->ops->memread(region->dev, gpa, size, data);
+	else 
+		rc = region->ops->memwrite(region->dev, gpa, size, *data);
+
+	return rc;
+}
+
+int
+vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data)
+{
+	return vdev_memrw(gpa, size, data, 1);
+}
+
+int
+vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data)
+{
+	return vdev_memrw(gpa, size, &data, 0);
+}
diff --git a/sys/amd64/vmm/io/vdev.h b/sys/amd64/vmm/io/vdev.h
new file mode 100644
index 0000000..6feeba8
--- /dev/null
+++ b/sys/amd64/vmm/io/vdev.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VDEV_H_
+#define	_VDEV_H_
+
+typedef enum {
+	BYTE	= 1,
+	WORD	= 2,
+	DWORD	= 4,
+	QWORD	= 8,
+} opsize_t;
+
+typedef enum {
+	MMIO_READ = 1,
+	MMIO_WRITE = 2,
+} region_attr_t;
+
+struct io_region {
+	uint64_t	base;
+	uint64_t	len;
+	region_attr_t	attr;
+	int		vcpu;
+};
+
+typedef int (*vdev_init_t)(void* dev);
+typedef int (*vdev_reset_t)(void* dev);
+typedef int (*vdev_halt_t)(void* dev);
+typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data);
+typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data);
+
+
+struct vdev_ops {
+	const char	*name;
+	vdev_init_t	init;
+	vdev_reset_t	reset;
+	vdev_halt_t	halt;
+	vdev_memread_t	memread;
+	vdev_memwrite_t	memwrite;
+};
+
+
+void vdev_vm_init(void);
+void vdev_vm_cleanup(void);
+
+int  vdev_register(struct vdev_ops *ops, void *dev);
+void vdev_unregister(void *dev);
+
+int  vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io);
+void vdev_unregister_region(void *dev, struct io_region *io);
+
+int vdev_init(void);
+int vdev_reset(void);
+int vdev_halt(void);
+int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data);
+int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data);
+
+#endif	/* _VDEV_H_ */
+
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
new file mode 100644
index 0000000..a56a36e
--- /dev/null
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -0,0 +1,907 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+
+#include <machine/clock.h>
+#include <x86/specialreg.h>
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+
+#include "vmm_stat.h"
+#include "vmm_lapic.h"
+#include "vmm_ktr.h"
+#include "vdev.h"
+#include "vlapic.h"
+
+#define	VLAPIC_CTR0(vlapic, format)					\
+	VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
+
+#define	VLAPIC_CTR1(vlapic, format, p1)					\
+	VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
+
+#define	VLAPIC_CTR_IRR(vlapic, msg)					\
+do {									\
+	uint32_t *irrptr = &(vlapic)->apic.irr0;			\
+	irrptr[0] = irrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);	\
+} while (0)
+
+#define	VLAPIC_CTR_ISR(vlapic, msg)					\
+do {									\
+	uint32_t *isrptr = &(vlapic)->apic.isr0;			\
+	isrptr[0] = isrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);	\
+} while (0)
+
+static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
+
+#define	PRIO(x)			((x) >> 4)
+
+#define VLAPIC_VERSION		(16)
+#define VLAPIC_MAXLVT_ENTRIES	(5)
+
+#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
+
+enum boot_state {
+	BS_INIT,
+	BS_SIPI,
+	BS_RUNNING
+};
+
+struct vlapic {
+	struct vm		*vm;
+	int			vcpuid;
+
+	struct io_region	*mmio;
+	struct vdev_ops		*ops;
+	struct LAPIC		 apic;
+
+	int			 esr_update;
+
+	int			 divisor;
+	int			 ccr_ticks;
+
+	/*
+	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+	 * A vector is popped from the stack when the processor does an EOI.
+	 * The vector on the top of the stack is used to compute the
+	 * Processor Priority in conjunction with the TPR.
+	 */
+	uint8_t			 isrvec_stk[ISRVEC_STK_SIZE];
+	int			 isrvec_stk_top;
+
+	uint64_t		msr_apicbase;
+	enum boot_state		boot_state;
+};
+
+#define VLAPIC_BUS_FREQ	tsc_freq
+
+static int
+vlapic_timer_divisor(uint32_t dcr)
+{
+	switch (dcr & 0xB) {
+	case APIC_TDCR_1:
+		return (1);
+	case APIC_TDCR_2:
+		return (2);
+	case APIC_TDCR_4:
+		return (4);
+	case APIC_TDCR_8:
+		return (8);
+	case APIC_TDCR_16:
+		return (16);
+	case APIC_TDCR_32:
+		return (32);
+	case APIC_TDCR_64:
+		return (64);
+	case APIC_TDCR_128:
+		return (128);
+	default:
+		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
+	}
+}
+
+static void
+vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
+{
+	int i;
+	for (i = 0; i < num_lvt; i++) {
+		*lvts |= APIC_LVT_M;
+		lvts += 4;
+	}
+}
+
+#if 0
+static inline void
+vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
+{
+	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
+	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
+	    *lvt & APIC_LVTT_M);
+}
+#endif
+
+static uint64_t
+vlapic_get_ccr(struct vlapic *vlapic)
+{
+	struct LAPIC    *lapic = &vlapic->apic;
+	return lapic->ccr_timer;
+}
+
+static void
+vlapic_update_errors(struct vlapic *vlapic)
+{
+	struct LAPIC    *lapic = &vlapic->apic;
+	lapic->esr = 0; // XXX 
+}
+
+static void
+vlapic_init_ipi(struct vlapic *vlapic)
+{
+	struct LAPIC    *lapic = &vlapic->apic;
+	lapic->version = VLAPIC_VERSION;
+	lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
+	lapic->dfr = 0xffffffff;
+	lapic->svr = APIC_SVR_VECTOR;
+	vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
+}
+
+static int
+vlapic_op_reset(void* dev)
+{
+	struct vlapic 	*vlapic = (struct vlapic*)dev;
+	struct LAPIC	*lapic = &vlapic->apic;
+
+	memset(lapic, 0, sizeof(*lapic));
+	lapic->apr = vlapic->vcpuid;
+	vlapic_init_ipi(vlapic);
+	vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
+
+	if (vlapic->vcpuid == 0)
+		vlapic->boot_state = BS_RUNNING;	/* BSP */
+	else
+		vlapic->boot_state = BS_INIT;		/* AP */
+	
+	return 0;
+
+}
+
+static int
+vlapic_op_init(void* dev)
+{
+	struct vlapic *vlapic = (struct vlapic*)dev;
+	vdev_register_region(vlapic->ops, vlapic, vlapic->mmio);
+	return vlapic_op_reset(dev);
+}
+
+static int
+vlapic_op_halt(void* dev)
+{
+	struct vlapic *vlapic = (struct vlapic*)dev;
+	vdev_unregister_region(vlapic, vlapic->mmio);
+	return 0;
+
+}
+
+void
+vlapic_set_intr_ready(struct vlapic *vlapic, int vector)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	uint32_t	*irrptr;
+	int		idx;
+
+	if (vector < 0 || vector >= 256)
+		panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
+
+	idx = (vector / 32) * 4;
+	irrptr = &lapic->irr0;
+	atomic_set_int(&irrptr[idx], 1 << (vector % 32));
+	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
+}
+
+static void
+vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
+{
+	uint32_t icr_timer;
+
+	icr_timer = vlapic->apic.icr_timer;
+
+	vlapic->ccr_ticks = ticks;
+	if (elapsed < icr_timer)
+		vlapic->apic.ccr_timer = icr_timer - elapsed;
+	else {
+		/*
+		 * This can happen when the guest is trying to run its local
+		 * apic timer higher that the setting of 'hz' in the host.
+		 *
+		 * We deal with this by running the guest local apic timer
+		 * at the rate of the host's 'hz' setting.
+		 */
+		vlapic->apic.ccr_timer = 0;
+	}
+}
+
+static __inline uint32_t *
+vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	int 		 i;
+
+	if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
+		panic("vlapic_get_lvt: invalid LVT\n");
+	}
+	i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
+	return ((&lapic->lvt_timer) + i);;
+}
+
+#if 1
+static void
+dump_isrvec_stk(struct vlapic *vlapic)
+{
+	int i;
+	uint32_t *isrptr;
+
+	isrptr = &vlapic->apic.isr0;
+	for (i = 0; i < 8; i++)
+		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
+
+	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
+		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+}
+#endif
+
+/*
+ * Algorithm adopted from section "Interrupt, Task and Processor Priority"
+ * in Intel Architecture Manual Vol 3a.
+ */
+static void
+vlapic_update_ppr(struct vlapic *vlapic)
+{
+	int isrvec, tpr, ppr;
+
+	/*
+	 * Note that the value on the stack at index 0 is always 0.
+	 *
+	 * This is a placeholder for the value of ISRV when none of the
+	 * bits is set in the ISRx registers.
+	 */
+	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
+	tpr = vlapic->apic.tpr;
+
+#if 1
+	{
+		int i, lastprio, curprio, vector, idx;
+		uint32_t *isrptr;
+
+		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
+			panic("isrvec_stk is corrupted: %d", isrvec);
+
+		/*
+		 * Make sure that the priority of the nested interrupts is
+		 * always increasing.
+		 */
+		lastprio = -1;
+		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
+			curprio = PRIO(vlapic->isrvec_stk[i]);
+			if (curprio <= lastprio) {
+				dump_isrvec_stk(vlapic);
+				panic("isrvec_stk does not satisfy invariant");
+			}
+			lastprio = curprio;
+		}
+
+		/*
+		 * Make sure that each bit set in the ISRx registers has a
+		 * corresponding entry on the isrvec stack.
+		 */
+		i = 1;
+		isrptr = &vlapic->apic.isr0;
+		for (vector = 0; vector < 256; vector++) {
+			idx = (vector / 32) * 4;
+			if (isrptr[idx] & (1 << (vector % 32))) {
+				if (i > vlapic->isrvec_stk_top ||
+				    vlapic->isrvec_stk[i] != vector) {
+					dump_isrvec_stk(vlapic);
+					panic("ISR and isrvec_stk out of sync");
+				}
+				i++;
+			}
+		}
+	}
+#endif
+
+	if (PRIO(tpr) >= PRIO(isrvec))
+		ppr = tpr;
+	else
+		ppr = isrvec & 0xf0;
+
+	vlapic->apic.ppr = ppr;
+	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
+}
+
+static void
+vlapic_process_eoi(struct vlapic *vlapic)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	uint32_t	*isrptr;
+	int		i, idx, bitpos;
+
+	isrptr = &lapic->isr0;
+
+	/*
+	 * The x86 architecture reserves the the first 32 vectors for use
+	 * by the processor.
+	 */
+	for (i = 7; i > 0; i--) {
+		idx = i * 4;
+		bitpos = fls(isrptr[idx]);
+		if (bitpos != 0) {
+			if (vlapic->isrvec_stk_top <= 0) {
+				panic("invalid vlapic isrvec_stk_top %d",
+				      vlapic->isrvec_stk_top);
+			}
+			isrptr[idx] &= ~(1 << (bitpos - 1));
+			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
+			vlapic->isrvec_stk_top--;
+			vlapic_update_ppr(vlapic);
+			return;
+		}
+	}
+}
+
+static __inline int
+vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
+{
+	return (*lvt & mask);
+}
+
+static __inline int
+vlapic_periodic_timer(struct vlapic *vlapic)
+{
+	uint32_t *lvt;
+	
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+
+	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
+}
+
+static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
+
+static void
+vlapic_fire_timer(struct vlapic *vlapic)
+{
+	int vector;
+	uint32_t *lvt;
+	
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+
+	if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
+		vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
+		vlapic_set_intr_ready(vlapic, vector);
+	}
+}
+
+static int
+lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
+{
+	int i;
+	cpuset_t dmask;
+	uint32_t dest, vec, mode;
+	struct vlapic *vlapic2;
+	struct vm_exit *vmexit;
+	
+	if (x2apic(vlapic))
+		dest = icrval >> 32;
+	else
+		dest = icrval >> (32 + 24);
+	vec = icrval & APIC_VECTOR_MASK;
+	mode = icrval & APIC_DELMODE_MASK;
+
+	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
+		switch (icrval & APIC_DEST_MASK) {
+		case APIC_DEST_DESTFLD:
+			CPU_SETOF(dest, &dmask);
+			break;
+		case APIC_DEST_SELF:
+			CPU_SETOF(vlapic->vcpuid, &dmask);
+			break;
+		case APIC_DEST_ALLISELF:
+			dmask = vm_active_cpus(vlapic->vm);
+			break;
+		case APIC_DEST_ALLESELF:
+			dmask = vm_active_cpus(vlapic->vm);
+			CPU_CLR(vlapic->vcpuid, &dmask);
+			break;
+		}
+
+		while ((i = cpusetobj_ffs(&dmask)) != 0) {
+			i--;
+			CPU_CLR(i, &dmask);
+			if (mode == APIC_DELMODE_FIXED)
+				lapic_set_intr(vlapic->vm, i, vec);
+			else
+				vm_inject_nmi(vlapic->vm, i);
+		}
+
+		return (0);	/* handled completely in the kernel */
+	}
+
+	if (mode == APIC_DELMODE_INIT) {
+		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
+			return (0);
+
+		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
+			vlapic2 = vm_lapic(vlapic->vm, dest);
+
+			/* move from INIT to waiting-for-SIPI state */
+			if (vlapic2->boot_state == BS_INIT) {
+				vlapic2->boot_state = BS_SIPI;
+			}
+
+			return (0);
+		}
+	}
+
+	if (mode == APIC_DELMODE_STARTUP) {
+		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
+			vlapic2 = vm_lapic(vlapic->vm, dest);
+
+			/*
+			 * Ignore SIPIs in any state other than wait-for-SIPI
+			 */
+			if (vlapic2->boot_state != BS_SIPI)
+				return (0);
+
+			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+			vmexit->u.spinup_ap.vcpu = dest;
+			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+
+			/*
+			 * XXX this assumes that the startup IPI always succeeds
+			 */
+			vlapic2->boot_state = BS_RUNNING;
+			vm_activate_cpu(vlapic2->vm, dest);
+
+			return (0);
+		}
+	}
+
+	/*
+	 * This will cause a return to userland.
+	 */
+	return (1);
+}
+
+int
+vlapic_pending_intr(struct vlapic *vlapic)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	int	  	 idx, i, bitpos, vector;
+	uint32_t	*irrptr, val;
+
+	irrptr = &lapic->irr0;
+
+	/*
+	 * The x86 architecture reserves the the first 32 vectors for use
+	 * by the processor.
+	 */
+	for (i = 7; i > 0; i--) {
+		idx = i * 4;
+		val = atomic_load_acq_int(&irrptr[idx]);
+		bitpos = fls(val);
+		if (bitpos != 0) {
+			vector = i * 32 + (bitpos - 1);
+			if (PRIO(vector) > PRIO(lapic->ppr)) {
+				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
+				return (vector);
+			} else 
+				break;
+		}
+	}
+	VLAPIC_CTR0(vlapic, "no pending intr");
+	return (-1);
+}
+
+void
+vlapic_intr_accepted(struct vlapic *vlapic, int vector)
+{
+	struct LAPIC	*lapic = &vlapic->apic;
+	uint32_t	*irrptr, *isrptr;
+	int		idx, stk_top;
+
+	/*
+	 * clear the ready bit for vector being accepted in irr 
+	 * and set the vector as in service in isr.
+	 */
+	idx = (vector / 32) * 4;
+
+	irrptr = &lapic->irr0;
+	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
+	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
+
+	isrptr = &lapic->isr0;
+	isrptr[idx] |= 1 << (vector % 32);
+	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
+
+	/*
+	 * Update the PPR
+	 */
+	vlapic->isrvec_stk_top++;
+
+	stk_top = vlapic->isrvec_stk_top;
+	if (stk_top >= ISRVEC_STK_SIZE)
+		panic("isrvec_stk_top overflow %d", stk_top);
+
+	vlapic->isrvec_stk[stk_top] = vector;
+	vlapic_update_ppr(vlapic);
+}
+
+int
+vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data)
+{
+	struct vlapic 	*vlapic = (struct vlapic*)dev;
+	struct LAPIC	*lapic = &vlapic->apic;
+	uint64_t	 offset = gpa & ~(PAGE_SIZE);
+	uint32_t	*reg;
+	int		 i;
+
+	if (offset > sizeof(*lapic)) {
+		*data = 0;
+		return 0;
+	}
+	
+	offset &= ~3;
+	switch(offset)
+	{
+		case APIC_OFFSET_ID:
+			if (x2apic(vlapic))
+				*data = vlapic->vcpuid;
+			else
+				*data = vlapic->vcpuid << 24;
+			break;
+		case APIC_OFFSET_VER:
+			*data = lapic->version;
+			break;
+		case APIC_OFFSET_TPR:
+			*data = lapic->tpr;
+			break;
+		case APIC_OFFSET_APR:
+			*data = lapic->apr;
+			break;
+		case APIC_OFFSET_PPR:
+			*data = lapic->ppr;
+			break;
+		case APIC_OFFSET_EOI:
+			*data = lapic->eoi;
+			break;
+		case APIC_OFFSET_LDR:
+			*data = lapic->ldr;
+			break;
+		case APIC_OFFSET_DFR:
+			*data = lapic->dfr;
+			break;
+		case APIC_OFFSET_SVR:
+			*data = lapic->svr;
+			break;
+		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+			i = (offset - APIC_OFFSET_ISR0) >> 2;
+			reg = &lapic->isr0;
+			*data = *(reg + i);
+			break;
+		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+			i = (offset - APIC_OFFSET_TMR0) >> 2;
+			reg = &lapic->tmr0;
+			*data = *(reg + i);
+			break;
+		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+			i = (offset - APIC_OFFSET_IRR0) >> 2;
+			reg = &lapic->irr0;
+			*data = atomic_load_acq_int(reg + i);
+			break;
+		case APIC_OFFSET_ESR:
+			*data = lapic->esr;
+			break;
+		case APIC_OFFSET_ICR_LOW: 
+			*data = lapic->icr_lo;
+			break;
+		case APIC_OFFSET_ICR_HI: 
+			*data = lapic->icr_hi;
+			break;
+		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+			reg = vlapic_get_lvt(vlapic, offset);	
+			*data = *(reg);
+			break;
+		case APIC_OFFSET_ICR:
+			*data = lapic->icr_timer;
+			break;
+		case APIC_OFFSET_CCR:
+			*data = vlapic_get_ccr(vlapic);
+			break;
+		case APIC_OFFSET_DCR:
+			*data = lapic->dcr_timer;
+			break;
+		case APIC_OFFSET_RRR:
+		default:
+			*data = 0;
+			break;
+	}
+	return 0;
+}
+
+int
+vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
+{
+	struct vlapic 	*vlapic = (struct vlapic*)dev;
+	struct LAPIC	*lapic = &vlapic->apic;
+	uint64_t	 offset = gpa & ~(PAGE_SIZE);
+	uint32_t	*reg;
+	int		retval;
+
+	if (offset > sizeof(*lapic)) {
+		return 0;
+	}
+
+	retval = 0;
+	offset &= ~3;
+	switch(offset)
+	{
+		case APIC_OFFSET_ID:
+			break;
+		case APIC_OFFSET_TPR:
+			lapic->tpr = data & 0xff;
+			vlapic_update_ppr(vlapic);
+			break;
+		case APIC_OFFSET_EOI:
+			vlapic_process_eoi(vlapic);
+			break;
+		case APIC_OFFSET_LDR:
+			break;
+		case APIC_OFFSET_DFR:
+			break;
+		case APIC_OFFSET_SVR:
+			lapic->svr = data;
+			break;
+		case APIC_OFFSET_ICR_LOW: 
+			if (!x2apic(vlapic)) {
+				data &= 0xffffffff;
+				data |= (uint64_t)lapic->icr_hi << 32;
+			}
+			retval = lapic_process_icr(vlapic, data);
+			break;
+		case APIC_OFFSET_ICR_HI:
+			if (!x2apic(vlapic)) {
+				retval = 0;
+				lapic->icr_hi = data;
+			}
+			break;
+		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+			reg = vlapic_get_lvt(vlapic, offset);	
+			if (!(lapic->svr & APIC_SVR_ENABLE)) {
+				data |= APIC_LVT_M;
+			}
+			*reg = data;
+			// vlapic_dump_lvt(offset, reg);
+			break;
+		case APIC_OFFSET_ICR:
+			lapic->icr_timer = data;
+			vlapic_start_timer(vlapic, 0);
+			break;
+
+		case APIC_OFFSET_DCR:
+			lapic->dcr_timer = data;
+			vlapic->divisor = vlapic_timer_divisor(data);
+			break;
+
+		case APIC_OFFSET_ESR:
+			vlapic_update_errors(vlapic);
+			break;
+		case APIC_OFFSET_VER:
+		case APIC_OFFSET_APR:
+		case APIC_OFFSET_PPR:
+		case APIC_OFFSET_RRR:
+		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+		case APIC_OFFSET_CCR:
+		default:
+			// Read only.
+			break;
+	}
+
+	return (retval);
+}
+
+int
+vlapic_timer_tick(struct vlapic *vlapic)
+{
+	int curticks, delta, periodic, fired;
+	uint32_t ccr;
+	uint32_t decrement, leftover;
+
+restart:
+	curticks = ticks;
+	delta = curticks - vlapic->ccr_ticks;
+
+	/* Local APIC timer is disabled */
+	if (vlapic->apic.icr_timer == 0)
+		return (-1);
+
+	/* One-shot mode and timer has already counted down to zero */
+	periodic = vlapic_periodic_timer(vlapic);
+	if (!periodic && vlapic->apic.ccr_timer == 0)
+		return (-1);
+	/*
+	 * The 'curticks' and 'ccr_ticks' are out of sync by more than
+	 * 2^31 ticks. We deal with this by restarting the timer.
+	 */
+	if (delta < 0) {
+		vlapic_start_timer(vlapic, 0);
+		goto restart;
+	}
+
+	fired = 0;
+	decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
+
+	vlapic->ccr_ticks = curticks;
+	ccr = vlapic->apic.ccr_timer;
+
+	while (delta-- > 0) {
+		if (ccr > decrement) {
+			ccr -= decrement;
+			continue;
+		}
+
+		/* Trigger the local apic timer interrupt */
+		vlapic_fire_timer(vlapic);
+		if (periodic) {
+			leftover = decrement - ccr;
+			vlapic_start_timer(vlapic, leftover);
+			ccr = vlapic->apic.ccr_timer;
+		} else {
+			/*
+			 * One-shot timer has counted down to zero.
+			 */
+			ccr = 0;
+		}
+		fired = 1;
+		break;
+	}
+
+	vlapic->apic.ccr_timer = ccr;
+
+	if (!fired)
+		return ((ccr / decrement) + 1);
+	else
+		return (0);
+}
+
+struct vdev_ops vlapic_dev_ops = {
+	.name = "vlapic",
+	.init = vlapic_op_init,
+	.reset = vlapic_op_reset,
+	.halt = vlapic_op_halt,
+	.memread = vlapic_op_mem_read,
+	.memwrite = vlapic_op_mem_write,
+};
+static struct io_region vlapic_mmio[VM_MAXCPU];
+
+struct vlapic *
+vlapic_init(struct vm *vm, int vcpuid)
+{
+	struct vlapic 		*vlapic;
+
+	vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
+	vlapic->vm = vm;
+	vlapic->vcpuid = vcpuid;
+
+	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
+
+	if (vcpuid == 0)
+		vlapic->msr_apicbase |= APICBASE_BSP;
+
+	vlapic->ops = &vlapic_dev_ops;
+
+	vlapic->mmio = vlapic_mmio + vcpuid;
+	vlapic->mmio->base = DEFAULT_APIC_BASE;
+	vlapic->mmio->len = PAGE_SIZE;
+	vlapic->mmio->attr = MMIO_READ|MMIO_WRITE;
+	vlapic->mmio->vcpu = vcpuid;
+
+	vdev_register(&vlapic_dev_ops, vlapic);
+
+	vlapic_op_init(vlapic);
+
+	return (vlapic);
+}
+
+void
+vlapic_cleanup(struct vlapic *vlapic)
+{
+	vlapic_op_halt(vlapic);
+	vdev_unregister(vlapic);
+	free(vlapic, M_VLAPIC);
+}
+
+uint64_t
+vlapic_get_apicbase(struct vlapic *vlapic)
+{
+
+	return (vlapic->msr_apicbase);
+}
+
+void
+vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
+{
+	int err;
+	enum x2apic_state state;
+
+	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
+	if (err)
+		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
+
+	if (state == X2APIC_DISABLED)
+		val &= ~APICBASE_X2APIC;
+
+	vlapic->msr_apicbase = val;
+}
+
+void
+vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
+{
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, vcpuid);
+
+	if (state == X2APIC_DISABLED)
+		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
+}
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
new file mode 100644
index 0000000..00de019
--- /dev/null
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VLAPIC_H_
+#define	_VLAPIC_H_
+
+#include "vdev.h"
+
+struct vm;
+  
+/*
+ * Map of APIC Registers:       Offset  Description          		 	Access
+ */
+#define APIC_OFFSET_ID 		0x20    // Local APIC ID               		R/W
+#define APIC_OFFSET_VER 	0x30    // Local APIC Version              	R
+#define APIC_OFFSET_TPR 	0x80    // Task Priority Register          	R/W
+#define APIC_OFFSET_APR 	0x90    // Arbitration Priority Register   	R
+#define APIC_OFFSET_PPR 	0xA0    // Processor Priority Register     	R
+#define APIC_OFFSET_EOI 	0xB0    // EOI Register                    	W
+#define APIC_OFFSET_RRR 	0xC0    // Remote read                     	R
+#define APIC_OFFSET_LDR 	0xD0    // Logical Destination             	R/W
+#define APIC_OFFSET_DFR 	0xE0    // Destination Format Register     	0..27 R;  28..31 R/W
+#define APIC_OFFSET_SVR 	0xF0    // Spurious Interrupt Vector Reg.  	0..3  R;  4..9   R/W
+#define APIC_OFFSET_ISR0 	0x100   // ISR  000-031                    	R
+#define APIC_OFFSET_ISR1 	0x110   // ISR  032-063                    	R
+#define APIC_OFFSET_ISR2 	0x120   // ISR  064-095                    	R
+#define APIC_OFFSET_ISR3 	0x130   // ISR  095-128                    	R
+#define APIC_OFFSET_ISR4 	0x140   // ISR  128-159                    	R
+#define APIC_OFFSET_ISR5 	0x150   // ISR  160-191                    	R
+#define APIC_OFFSET_ISR6 	0x160   // ISR  192-223                    	R
+#define APIC_OFFSET_ISR7 	0x170   // ISR  224-255                    	R
+#define APIC_OFFSET_TMR0 	0x180   // TMR  000-031                    	R
+#define APIC_OFFSET_TMR1 	0x190   // TMR  032-063                    	R
+#define APIC_OFFSET_TMR2 	0x1A0   // TMR  064-095                    	R
+#define APIC_OFFSET_TMR3 	0x1B0   // TMR  095-128                    	R
+#define APIC_OFFSET_TMR4 	0x1C0   // TMR  128-159                    	R
+#define APIC_OFFSET_TMR5 	0x1D0   // TMR  160-191                    	R
+#define APIC_OFFSET_TMR6 	0x1E0   // TMR  192-223                    	R
+#define APIC_OFFSET_TMR7 	0x1F0   // TMR  224-255                    	R
+#define APIC_OFFSET_IRR0 	0x200   // IRR  000-031                    	R
+#define APIC_OFFSET_IRR1 	0x210   // IRR  032-063                    	R
+#define APIC_OFFSET_IRR2 	0x220   // IRR  064-095                    	R
+#define APIC_OFFSET_IRR3 	0x230   // IRR  095-128                    	R
+#define APIC_OFFSET_IRR4 	0x240   // IRR  128-159                    	R
+#define APIC_OFFSET_IRR5 	0x250   // IRR  160-191                    	R
+#define APIC_OFFSET_IRR6 	0x260   // IRR  192-223                    	R
+#define APIC_OFFSET_IRR7 	0x270   // IRR  224-255                    	R
+#define APIC_OFFSET_ESR		0x280   // Error Status Register           	R
+#define APIC_OFFSET_ICR_LOW 	0x300   // Interrupt Command Reg. (0-31)   	R/W
+#define APIC_OFFSET_ICR_HI 	0x310   // Interrupt Command Reg. (32-63)  	R/W
+#define APIC_OFFSET_TIMER_LVT 	0x320   // Local Vector Table (Timer)      	R/W
+#define APIC_OFFSET_THERM_LVT 	0x330   // Local Vector Table (Thermal)    	R/W (PIV+)
+#define APIC_OFFSET_PERF_LVT 	0x340   // Local Vector Table (Performance) 	R/W (P6+)
+#define APIC_OFFSET_LINT0_LVT 	0x350   // Local Vector Table (LINT0)      	R/W
+#define APIC_OFFSET_LINT1_LVT 	0x360 	// Local Vector Table (LINT1)      	R/W
+#define APIC_OFFSET_ERROR_LVT 	0x370   // Local Vector Table (ERROR)      	R/W
+#define APIC_OFFSET_ICR 	0x380   // Initial Count Reg. for Timer    	R/W
+#define APIC_OFFSET_CCR 	0x390   // Current Count of Timer          	R
+#define APIC_OFFSET_DCR 	0x3E0   // Timer Divide Configuration Reg. 	R/W
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define	ISRVEC_STK_SIZE		(16 + 1)
+
+enum x2apic_state;
+
+struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
+void vlapic_cleanup(struct vlapic *vlapic);
+
+int vlapic_op_mem_write(void* dev, uint64_t gpa,
+    			opsize_t size, uint64_t data);
+
+int vlapic_op_mem_read(void* dev, uint64_t gpa,
+    			opsize_t size, uint64_t *data);
+
+int vlapic_pending_intr(struct vlapic *vlapic);
+void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
+void vlapic_set_intr_ready(struct vlapic *vlapic, int vector);
+int vlapic_timer_tick(struct vlapic *vlapic);
+
+uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
+void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
+void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
+
+#endif	/* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
new file mode 100644
index 0000000..f21bddd
--- /dev/null
+++ b/sys/amd64/vmm/vmm.c
@@ -0,0 +1,992 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+#include <machine/vm.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+#include "vmm_host.h"
+#include "vmm_mem.h"
+#include "vmm_util.h"
+#include <machine/vmm_dev.h>
+#include "vlapic.h"
+#include "vmm_msr.h"
+#include "vmm_ipi.h"
+#include "vmm_stat.h"
+#include "vmm_lapic.h"
+
+#include "io/ppt.h"
+#include "io/iommu.h"
+
+struct vlapic;
+
+struct vcpu {
+	int		flags;
+	enum vcpu_state	state;
+	struct mtx	mtx;
+	int		hostcpu;	/* host cpuid this vcpu last ran on */
+	uint64_t	guest_msrs[VMM_MSR_NUM];
+	struct vlapic	*vlapic;
+	int		 vcpuid;
+	struct savefpu	*guestfpu;	/* guest fpu state */
+	void		*stats;
+	struct vm_exit	exitinfo;
+	enum x2apic_state x2apic_state;
+	int		nmi_pending;
+};
+
+#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
+#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
+
+#define	VM_MAX_MEMORY_SEGMENTS	2
+
+struct vm {
+	void		*cookie;	/* processor-specific data */
+	void		*iommu;		/* iommu-specific data */
+	struct vcpu	vcpu[VM_MAXCPU];
+	int		num_mem_segs;
+	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
+	char		name[VM_MAX_NAMELEN];
+
+	/*
+	 * Set of active vcpus.
+	 * An active vcpu is one that has been started implicitly (BSP) or
+	 * explicitly (AP) by sending it a startup ipi.
+	 */
+	cpuset_t	active_cpus;
+};
+
+static int vmm_initialized;
+
+static struct vmm_ops *ops;
+#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
+#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
+
+#define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
+#define	VMRUN(vmi, vcpu, rip) \
+	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
+#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
+#define	VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm)			\
+    	(ops != NULL ? 							\
+    	(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) :	\
+	ENXIO)
+#define	VMMMAP_GET(vmi, gpa) \
+	(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
+#define	VMGETREG(vmi, vcpu, num, retval)		\
+	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETREG(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
+#define	VMGETDESC(vmi, vcpu, num, desc)		\
+	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
+#define	VMSETDESC(vmi, vcpu, num, desc)		\
+	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
+#define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
+	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
+#define	VMGETCAP(vmi, vcpu, num, retval)	\
+	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETCAP(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+
+#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
+#define	fpu_stop_emulating()	clts()
+
+static MALLOC_DEFINE(M_VM, "vm", "vm");
+CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+static void
+vcpu_cleanup(struct vcpu *vcpu)
+{
+	vlapic_cleanup(vcpu->vlapic);
+	vmm_stat_free(vcpu->stats);	
+	fpu_save_area_free(vcpu->guestfpu);
+}
+
+static void
+vcpu_init(struct vm *vm, uint32_t vcpu_id)
+{
+	struct vcpu *vcpu;
+	
+	vcpu = &vm->vcpu[vcpu_id];
+
+	vcpu_lock_init(vcpu);
+	vcpu->hostcpu = NOCPU;
+	vcpu->vcpuid = vcpu_id;
+	vcpu->vlapic = vlapic_init(vm, vcpu_id);
+	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
+	vcpu->guestfpu = fpu_save_area_alloc();
+	fpu_save_area_reset(vcpu->guestfpu);
+	vcpu->stats = vmm_stat_alloc();
+}
+
+struct vm_exit *
+vm_exitinfo(struct vm *vm, int cpuid)
+{
+	struct vcpu *vcpu;
+
+	if (cpuid < 0 || cpuid >= VM_MAXCPU)
+		panic("vm_exitinfo: invalid cpuid %d", cpuid);
+
+	vcpu = &vm->vcpu[cpuid];
+
+	return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+	int error;
+
+	vmm_host_state_init();
+	vmm_ipi_init();
+
+	error = vmm_mem_init();
+	if (error)
+		return (error);
+	
+	if (vmm_is_intel())
+		ops = &vmm_ops_intel;
+	else if (vmm_is_amd())
+		ops = &vmm_ops_amd;
+	else
+		return (ENXIO);
+
+	vmm_msr_init();
+
+	return (VMM_INIT());
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+	int error;
+
+	switch (what) {
+	case MOD_LOAD:
+		vmmdev_init();
+		iommu_init();
+		error = vmm_init();
+		if (error == 0)
+			vmm_initialized = 1;
+		break;
+	case MOD_UNLOAD:
+		error = vmmdev_cleanup();
+		if (error == 0) {
+			iommu_cleanup();
+			vmm_ipi_cleanup();
+			error = VMM_CLEANUP();
+		}
+		vmm_initialized = 0;
+		break;
+	default:
+		error = 0;
+		break;
+	}
+	return (error);
+}
+
+static moduledata_t vmm_kmod = {
+	"vmm",
+	vmm_handler,
+	NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - iommu initialization must happen after the pci passthru driver has had
+ *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
+ *
+ * - VT-x initialization requires smp_rendezvous() and therefore must happen
+ *   after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+	int i;
+	struct vm *vm;
+	vm_paddr_t maxaddr;
+
+	const int BSP = 0;
+
+	/*
+	 * If vmm.ko could not be successfully initialized then don't attempt
+	 * to create the virtual machine.
+	 */
+	if (!vmm_initialized)
+		return (ENXIO);
+
+	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+		return (EINVAL);
+
+	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
+	strcpy(vm->name, name);
+	vm->cookie = VMINIT(vm);
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vcpu_init(vm, i);
+		guest_msrs_init(vm, i);
+	}
+
+	maxaddr = vmm_mem_maxaddr();
+	vm->iommu = iommu_create_domain(maxaddr);
+	vm_activate_cpu(vm, BSP);
+
+	*retvm = vm;
+	return (0);
+}
+
+static void
+vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
+{
+	size_t len;
+	vm_paddr_t hpa;
+	void *host_domain;
+
+	host_domain = iommu_host_domain();
+
+	len = 0;
+	while (len < seg->len) {
+		hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
+		if (hpa == (vm_paddr_t)-1) {
+			panic("vm_free_mem_segs: cannot free hpa "
+			      "associated with gpa 0x%016lx", seg->gpa + len);
+		}
+
+		/*
+		 * Remove the 'gpa' to 'hpa' mapping in VMs domain.
+		 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'.
+		 */
+		iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE);
+		iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE);
+
+		vmm_mem_free(hpa, PAGE_SIZE);
+
+		len += PAGE_SIZE;
+	}
+
+	/*
+	 * Invalidate cached translations associated with 'vm->iommu' since
+	 * we have now moved some pages from it.
+	 */
+	iommu_invalidate_tlb(vm->iommu);
+
+	bzero(seg, sizeof(struct vm_memory_segment));
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+	int i;
+
+	ppt_unassign_all(vm);
+
+	for (i = 0; i < vm->num_mem_segs; i++)
+		vm_free_mem_seg(vm, &vm->mem_segs[i]);
+
+	vm->num_mem_segs = 0;
+
+	for (i = 0; i < VM_MAXCPU; i++)
+		vcpu_cleanup(&vm->vcpu[i]);
+
+	iommu_destroy_domain(vm->iommu);
+
+	VMCLEANUP(vm->cookie);
+
+	free(vm, M_VM);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+	return (vm->name);
+}
+
+int
+vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
+{
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
+			   VM_PROT_RW, spok));
+}
+
+int
+vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
+			   VM_PROT_NONE, spok));
+}
+
+/*
+ * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
+ */
+static boolean_t
+vm_gpa_available(struct vm *vm, vm_paddr_t gpa)
+{
+	int i;
+	vm_paddr_t gpabase, gpalimit;
+
+	if (gpa & PAGE_MASK)
+		panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		gpabase = vm->mem_segs[i].gpa;
+		gpalimit = gpabase + vm->mem_segs[i].len;
+		if (gpa >= gpabase && gpa < gpalimit)
+			return (FALSE);
+	}
+
+	return (TRUE);
+}
+
+int
+vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	int error, available, allocated;
+	struct vm_memory_segment *seg;
+	vm_paddr_t g, hpa;
+	void *host_domain;
+
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
+		return (EINVAL);
+	
+	available = allocated = 0;
+	g = gpa;
+	while (g < gpa + len) {
+		if (vm_gpa_available(vm, g))
+			available++;
+		else
+			allocated++;
+
+		g += PAGE_SIZE;
+	}
+
+	/*
+	 * If there are some allocated and some available pages in the address
+	 * range then it is an error.
+	 */
+	if (allocated && available)
+		return (EINVAL);
+
+	/*
+	 * If the entire address range being requested has already been
+	 * allocated then there isn't anything more to do.
+	 */
+	if (allocated && available == 0)
+		return (0);
+
+	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
+		return (E2BIG);
+
+	host_domain = iommu_host_domain();
+
+	seg = &vm->mem_segs[vm->num_mem_segs];
+
+	error = 0;
+	seg->gpa = gpa;
+	seg->len = 0;
+	while (seg->len < len) {
+		hpa = vmm_mem_alloc(PAGE_SIZE);
+		if (hpa == 0) {
+			error = ENOMEM;
+			break;
+		}
+
+		error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
+				   VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
+		if (error)
+			break;
+
+		/*
+		 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'.
+		 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain.
+		 */
+		iommu_remove_mapping(host_domain, hpa, PAGE_SIZE);
+		iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
+
+		seg->len += PAGE_SIZE;
+	}
+
+	if (error) {
+		vm_free_mem_seg(vm, seg);
+		return (error);
+	}
+
+	/*
+	 * Invalidate cached translations associated with 'host_domain' since
+	 * we have now moved some pages from it.
+	 */
+	iommu_invalidate_tlb(host_domain);
+
+	vm->num_mem_segs++;
+
+	return (0);
+}
+
+vm_paddr_t
+vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	vm_paddr_t nextpage;
+
+	nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
+	if (len > nextpage - gpa)
+		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+	return (VMMMAP_GET(vm->cookie, gpa));
+}
+
+int
+vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
+		  struct vm_memory_segment *seg)
+{
+	int i;
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		if (gpabase == vm->mem_segs[i].gpa) {
+			*seg = vm->mem_segs[i];
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+int
+vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+
+	return (VMGETREG(vm->cookie, vcpu, reg, retval));
+}
+
+int
+vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+
+	return (VMSETREG(vm->cookie, vcpu, reg, val));
+}
+
+static boolean_t
+is_descriptor_table(int reg)
+{
+
+	switch (reg) {
+	case VM_REG_GUEST_IDTR:
+	case VM_REG_GUEST_GDTR:
+		return (TRUE);
+	default:
+		return (FALSE);
+	}
+}
+
+static boolean_t
+is_segment_register(int reg)
+{
+	
+	switch (reg) {
+	case VM_REG_GUEST_ES:
+	case VM_REG_GUEST_CS:
+	case VM_REG_GUEST_SS:
+	case VM_REG_GUEST_DS:
+	case VM_REG_GUEST_FS:
+	case VM_REG_GUEST_GS:
+	case VM_REG_GUEST_TR:
+	case VM_REG_GUEST_LDTR:
+		return (TRUE);
+	default:
+		return (FALSE);
+	}
+}
+
+int
+vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
+		struct seg_desc *desc)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (!is_segment_register(reg) && !is_descriptor_table(reg))
+		return (EINVAL);
+
+	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
+}
+
+int
+vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
+		struct seg_desc *desc)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (!is_segment_register(reg) && !is_descriptor_table(reg))
+		return (EINVAL);
+
+	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
+}
+
+static void
+restore_guest_fpustate(struct vcpu *vcpu)
+{
+
+	/* flush host state to the pcb */
+	fpuexit(curthread);
+
+	/* restore guest FPU state */
+	fpu_stop_emulating();
+	fpurestore(vcpu->guestfpu);
+
+	/*
+	 * The FPU is now "dirty" with the guest's state so turn on emulation
+	 * to trap any access to the FPU by the host.
+	 */
+	fpu_start_emulating();
+}
+
+static void
+save_guest_fpustate(struct vcpu *vcpu)
+{
+
+	if ((rcr0() & CR0_TS) == 0)
+		panic("fpu emulation not enabled in host!");
+
+	/* save guest FPU state */
+	fpu_stop_emulating();
+	fpusave(vcpu->guestfpu);
+	fpu_start_emulating();
+}
+
+static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
+
+int
+vm_run(struct vm *vm, struct vm_run *vmrun)
+{
+	int error, vcpuid, sleepticks, t;
+	struct vcpu *vcpu;
+	struct pcb *pcb;
+	uint64_t tscval, rip;
+	struct vm_exit *vme;
+
+	vcpuid = vmrun->cpuid;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+	vme = &vmrun->vm_exit;
+	rip = vmrun->rip;
+restart:
+	critical_enter();
+
+	tscval = rdtsc();
+
+	pcb = PCPU_GET(curpcb);
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+
+	restore_guest_msrs(vm, vcpuid);	
+	restore_guest_fpustate(vcpu);
+
+	vcpu->hostcpu = curcpu;
+	error = VMRUN(vm->cookie, vcpuid, rip);
+	vcpu->hostcpu = NOCPU;
+
+	save_guest_fpustate(vcpu);
+	restore_host_msrs(vm, vcpuid);
+
+	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
+
+	/* copy the exit information */
+	bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit));
+
+	critical_exit();
+
+	/*
+	 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu
+	 * is ready to run.
+	 */
+	if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) {
+		vcpu_lock(vcpu);
+
+		/*
+		 * Figure out the number of host ticks until the next apic
+		 * timer interrupt in the guest.
+		 */
+		sleepticks = lapic_timer_tick(vm, vcpuid);
+
+		/*
+		 * If the guest local apic timer is disabled then sleep for
+		 * a long time but not forever.
+		 */
+		if (sleepticks < 0)
+			sleepticks = hz;
+
+		/*
+		 * Do a final check for pending NMI or interrupts before
+		 * really putting this thread to sleep.
+		 *
+		 * These interrupts could have happened any time after we
+		 * returned from VMRUN() and before we grabbed the vcpu lock.
+		 */
+		if (!vm_nmi_pending(vm, vcpuid) &&
+		    lapic_pending_intr(vm, vcpuid) < 0) {
+			if (sleepticks <= 0)
+				panic("invalid sleepticks %d", sleepticks);
+			t = ticks;
+			msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
+			vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
+		}
+
+		vcpu_unlock(vcpu);
+
+		rip = vme->rip + vme->inst_length;
+		goto restart;
+	}
+
+	return (error);
+}
+
+int
+vm_inject_event(struct vm *vm, int vcpuid, int type,
+		int vector, uint32_t code, int code_valid)
+{
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
+		return (EINVAL);
+
+	if (vector < 0 || vector > 255)
+		return (EINVAL);
+
+	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
+}
+
+static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
+
+int
+vm_inject_nmi(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu->nmi_pending = 1;
+	vm_interrupt_hostcpu(vm, vcpuid);
+	return (0);
+}
+
+int
+vm_nmi_pending(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	return (vcpu->nmi_pending);
+}
+
+void
+vm_nmi_clear(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	if (vcpu->nmi_pending == 0)
+		panic("vm_nmi_clear: inconsistent nmi_pending state");
+
+	vcpu->nmi_pending = 0;
+	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
+}
+
+int
+vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (type < 0 || type >= VM_CAP_MAX)
+		return (EINVAL);
+
+	return (VMGETCAP(vm->cookie, vcpu, type, retval));
+}
+
+int
+vm_set_capability(struct vm *vm, int vcpu, int type, int val)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (type < 0 || type >= VM_CAP_MAX)
+		return (EINVAL);
+
+	return (VMSETCAP(vm->cookie, vcpu, type, val));
+}
+
+uint64_t *
+vm_guest_msrs(struct vm *vm, int cpu)
+{
+	return (vm->vcpu[cpu].guest_msrs);
+}
+
+struct vlapic *
+vm_lapic(struct vm *vm, int cpu)
+{
+	return (vm->vcpu[cpu].vlapic);
+}
+
+boolean_t
+vmm_is_pptdev(int bus, int slot, int func)
+{
+	int found, i, n;
+	int b, s, f;
+	char *val, *cp, *cp2;
+
+	/*
+	 * XXX
+	 * The length of an environment variable is limited to 128 bytes which
+	 * puts an upper limit on the number of passthru devices that may be
+	 * specified using a single environment variable.
+	 *
+	 * Work around this by scanning multiple environment variable
+	 * names instead of a single one - yuck!
+	 */
+	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
+
+	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
+	found = 0;
+	for (i = 0; names[i] != NULL && !found; i++) {
+		cp = val = getenv(names[i]);
+		while (cp != NULL && *cp != '\0') {
+			if ((cp2 = strchr(cp, ' ')) != NULL)
+				*cp2 = '\0';
+
+			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
+			if (n == 3 && bus == b && slot == s && func == f) {
+				found = 1;
+				break;
+			}
+		
+			if (cp2 != NULL)
+				*cp2++ = ' ';
+
+			cp = cp2;
+		}
+		freeenv(val);
+	}
+	return (found);
+}
+
+void *
+vm_iommu_domain(struct vm *vm)
+{
+
+	return (vm->iommu);
+}
+
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
+{
+	int error;
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+
+	/*
+	 * The following state transitions are allowed:
+	 * IDLE -> RUNNING -> IDLE
+	 * IDLE -> CANNOT_RUN -> IDLE
+	 */
+	if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
+	    (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
+		error = 0;
+		vcpu->state = state;
+	} else {
+		error = EBUSY;
+	}
+
+	vcpu_unlock(vcpu);
+
+	return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+	enum vcpu_state state;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	state = vcpu->state;
+	vcpu_unlock(vcpu);
+
+	return (state);
+}
+
+void
+vm_activate_cpu(struct vm *vm, int vcpuid)
+{
+
+	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
+		CPU_SET(vcpuid, &vm->active_cpus);
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+	return (vm->active_cpus);
+}
+
+void *
+vcpu_stats(struct vm *vm, int vcpuid)
+{
+
+	return (vm->vcpu[vcpuid].stats);
+}
+
+int
+vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
+{
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	*state = vm->vcpu[vcpuid].x2apic_state;
+
+	return (0);
+}
+
+int
+vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
+{
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (state >= X2APIC_STATE_LAST)
+		return (EINVAL);
+
+	vm->vcpu[vcpuid].x2apic_state = state;
+
+	vlapic_set_x2apic_state(vm, vcpuid, state);
+
+	return (0);
+}
+
+void
+vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
+{
+	int hostcpu;
+	struct vcpu *vcpu;
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	hostcpu = vcpu->hostcpu;
+	if (hostcpu == NOCPU) {
+		/*
+		 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then
+		 * the host thread must be sleeping waiting for an event to
+		 * kick the vcpu out of 'hlt'.
+		 *
+		 * XXX this is racy because the condition exists right before
+		 * and after calling VMRUN() in vm_run(). The wakeup() is
+		 * benign in this case.
+		 */
+		if (vcpu->state == VCPU_RUNNING)
+			wakeup_one(vcpu);
+	} else {
+		if (vcpu->state != VCPU_RUNNING)
+			panic("invalid vcpu state %d", vcpu->state);
+		if (hostcpu != curcpu)
+			ipi_cpu(hostcpu, vmm_ipinum);
+	}
+	vcpu_unlock(vcpu);
+}
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
new file mode 100644
index 0000000..7608d5c
--- /dev/null
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -0,0 +1,526 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+
+#include <machine/vmm.h>
+#include "vmm_lapic.h"
+#include "vmm_stat.h"
+#include "vmm_mem.h"
+#include "io/ppt.h"
+#include <machine/vmm_dev.h>
+
+struct vmmdev_softc {
+	struct vm	*vm;		/* vm instance cookie */
+	struct cdev	*cdev;
+	SLIST_ENTRY(vmmdev_softc) link;
+};
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static struct mtx vmmdev_mtx;
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+	struct vmmdev_softc *sc;
+
+#ifdef notyet	/* XXX kernel is not compiled with invariants */
+	mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+	SLIST_FOREACH(sc, &head, link) {
+		if (strcmp(name, vm_name(sc->vm)) == 0)
+			break;
+	}
+
+	return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+	return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+	int error, off, c;
+	vm_paddr_t hpa, gpa;
+	struct vmmdev_softc *sc;
+
+	static char zerobuf[PAGE_SIZE];
+
+	error = 0;
+	mtx_lock(&vmmdev_mtx);
+	sc = vmmdev_lookup2(cdev);
+	if (sc == NULL)
+		error = ENXIO;
+
+	while (uio->uio_resid > 0 && error == 0) {
+		gpa = uio->uio_offset;
+		off = gpa & PAGE_MASK;
+		c = min(uio->uio_resid, PAGE_SIZE - off);
+
+		/*
+		 * The VM has a hole in its physical memory map. If we want to
+		 * use 'dd' to inspect memory beyond the hole we need to
+		 * provide bogus data for memory that lies in the hole.
+		 *
+		 * Since this device does not support lseek(2), dd(1) will
+		 * read(2) blocks of data to simulate the lseek(2).
+		 */
+		hpa = vm_gpa2hpa(sc->vm, gpa, c);
+		if (hpa == (vm_paddr_t)-1) {
+			if (uio->uio_rw == UIO_READ)
+				error = uiomove(zerobuf, c, uio);
+			else
+				error = EFAULT;
+		} else
+			error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
+	}
+
+	mtx_unlock(&vmmdev_mtx);
+	return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+	     struct thread *td)
+{
+	int error, vcpu, state_changed;
+	enum vcpu_state new_state;
+	struct vmmdev_softc *sc;
+	struct vm_memory_segment *seg;
+	struct vm_register *vmreg;
+	struct vm_seg_desc* vmsegdesc;
+	struct vm_run *vmrun;
+	struct vm_event *vmevent;
+	struct vm_lapic_irq *vmirq;
+	struct vm_capability *vmcap;
+	struct vm_pptdev *pptdev;
+	struct vm_pptdev_mmio *pptmmio;
+	struct vm_pptdev_msi *pptmsi;
+	struct vm_pptdev_msix *pptmsix;
+	struct vm_nmi *vmnmi;
+	struct vm_stats *vmstats;
+	struct vm_stat_desc *statdesc;
+	struct vm_x2apic *x2apic;
+
+	sc = vmmdev_lookup2(cdev);
+	if (sc == NULL)
+		return (ENXIO);
+
+	vcpu = -1;
+	state_changed = 0;
+
+	/*
+	 * Some VMM ioctls can operate only on vcpus that are not running.
+	 */
+	switch (cmd) {
+	case VM_RUN:
+	case VM_GET_REGISTER:
+	case VM_SET_REGISTER:
+	case VM_GET_SEGMENT_DESCRIPTOR:
+	case VM_SET_SEGMENT_DESCRIPTOR:
+	case VM_INJECT_EVENT:
+	case VM_GET_CAPABILITY:
+	case VM_SET_CAPABILITY:
+	case VM_PPTDEV_MSI:
+	case VM_PPTDEV_MSIX:
+	case VM_SET_X2APIC_STATE:
+		/*
+		 * XXX fragile, handle with care
+		 * Assumes that the first field of the ioctl data is the vcpu.
+		 */
+		vcpu = *(int *)data;
+		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+			error = EINVAL;
+			goto done;
+		}
+
+		if (cmd == VM_RUN)
+			new_state = VCPU_RUNNING;
+		else
+			new_state = VCPU_CANNOT_RUN;
+
+		error = vcpu_set_state(sc->vm, vcpu, new_state);
+		if (error)
+			goto done;
+
+		state_changed = 1;
+		break;
+
+	case VM_MAP_PPTDEV_MMIO:
+	case VM_BIND_PPTDEV:
+	case VM_UNBIND_PPTDEV:
+	case VM_MAP_MEMORY:
+		/*
+		 * ioctls that operate on the entire virtual machine must
+		 * prevent all vcpus from running.
+		 */
+		error = 0;
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+			error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN);
+			if (error)
+				break;
+		}
+
+		if (error) {
+			while (--vcpu >= 0)
+				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+			goto done;
+		}
+
+		state_changed = 2;
+		break;
+
+	default:
+		break;
+	}
+
+	switch(cmd) {
+	case VM_RUN:
+		vmrun = (struct vm_run *)data;
+		error = vm_run(sc->vm, vmrun);
+		break;
+	case VM_STAT_DESC: {
+		const char *desc;
+		statdesc = (struct vm_stat_desc *)data;
+		desc = vmm_stat_desc(statdesc->index);
+		if (desc != NULL) {
+			error = 0;
+			strlcpy(statdesc->desc, desc, sizeof(statdesc->desc));
+		} else
+			error = EINVAL;
+		break;
+	}
+	case VM_STATS: {
+		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES);
+		vmstats = (struct vm_stats *)data;
+		getmicrotime(&vmstats->tv);
+		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
+				      &vmstats->num_entries, vmstats->statbuf);
+		break;
+	}
+	case VM_PPTDEV_MSI:
+		pptmsi = (struct vm_pptdev_msi *)data;
+		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
+				      pptmsi->bus, pptmsi->slot, pptmsi->func,
+				      pptmsi->destcpu, pptmsi->vector,
+				      pptmsi->numvec);
+		break;
+	case VM_PPTDEV_MSIX:
+		pptmsix = (struct vm_pptdev_msix *)data;
+		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
+				       pptmsix->bus, pptmsix->slot, 
+				       pptmsix->func, pptmsix->idx,
+				       pptmsix->msg, pptmsix->vector_control,
+				       pptmsix->addr);
+		break;
+	case VM_MAP_PPTDEV_MMIO:
+		pptmmio = (struct vm_pptdev_mmio *)data;
+		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
+				     pptmmio->func, pptmmio->gpa, pptmmio->len,
+				     pptmmio->hpa);
+		break;
+	case VM_BIND_PPTDEV:
+		pptdev = (struct vm_pptdev *)data;
+		error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
+					  pptdev->func);
+		break;
+	case VM_UNBIND_PPTDEV:
+		pptdev = (struct vm_pptdev *)data;
+		error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
+					    pptdev->func);
+		break;
+	case VM_INJECT_EVENT:
+		vmevent = (struct vm_event *)data;
+		error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
+					vmevent->vector,
+					vmevent->error_code,
+					vmevent->error_code_valid);
+		break;
+	case VM_INJECT_NMI:
+		vmnmi = (struct vm_nmi *)data;
+		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
+		break;
+	case VM_LAPIC_IRQ:
+		vmirq = (struct vm_lapic_irq *)data;
+		error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
+		break;
+	case VM_MAP_MEMORY:
+		seg = (struct vm_memory_segment *)data;
+		error = vm_malloc(sc->vm, seg->gpa, seg->len);
+		break;
+	case VM_GET_MEMORY_SEG:
+		seg = (struct vm_memory_segment *)data;
+		seg->len = 0;
+		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
+		error = 0;
+		break;
+	case VM_GET_REGISTER:
+		vmreg = (struct vm_register *)data;
+		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+					&vmreg->regval);
+		break;
+	case VM_SET_REGISTER:
+		vmreg = (struct vm_register *)data;
+		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+					vmreg->regval);
+		break;
+	case VM_SET_SEGMENT_DESCRIPTOR:
+		vmsegdesc = (struct vm_seg_desc *)data;
+		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
+					vmsegdesc->regnum,
+					&vmsegdesc->desc);
+		break;
+	case VM_GET_SEGMENT_DESCRIPTOR:
+		vmsegdesc = (struct vm_seg_desc *)data;
+		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
+					vmsegdesc->regnum,
+					&vmsegdesc->desc);
+		break;
+	case VM_GET_CAPABILITY:
+		vmcap = (struct vm_capability *)data;
+		error = vm_get_capability(sc->vm, vmcap->cpuid,
+					  vmcap->captype,
+					  &vmcap->capval);
+		break;
+	case VM_SET_CAPABILITY:
+		vmcap = (struct vm_capability *)data;
+		error = vm_set_capability(sc->vm, vmcap->cpuid,
+					  vmcap->captype,
+					  vmcap->capval);
+		break;
+	case VM_SET_X2APIC_STATE:
+		x2apic = (struct vm_x2apic *)data;
+		error = vm_set_x2apic_state(sc->vm,
+					    x2apic->cpuid, x2apic->state);
+		break;
+	case VM_GET_X2APIC_STATE:
+		x2apic = (struct vm_x2apic *)data;
+		error = vm_get_x2apic_state(sc->vm,
+					    x2apic->cpuid, &x2apic->state);
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+	if (state_changed == 1) {
+		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+	} else if (state_changed == 2) {
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+	}
+
+done:
+	return (error);
+}
+
+static int
+vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+    int nprot, vm_memattr_t *memattr)
+{
+	int error;
+	struct vmmdev_softc *sc;
+
+	error = -1;
+	mtx_lock(&vmmdev_mtx);
+
+	sc = vmmdev_lookup2(cdev);
+	if (sc != NULL && (nprot & PROT_EXEC) == 0) {
+		*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
+		if (*paddr != (vm_paddr_t)-1)
+			error = 0;
+	}
+
+	mtx_unlock(&vmmdev_mtx);
+
+	return (error);
+}
+
+static void
+vmmdev_destroy(struct vmmdev_softc *sc, boolean_t unlink)
+{
+
+	/*
+	 * XXX must stop virtual machine instances that may be still
+	 * running and cleanup their state.
+	 */
+	if (sc->cdev)
+		destroy_dev(sc->cdev);
+
+	if (sc->vm)
+		vm_destroy(sc->vm);
+
+	if (unlink) {
+		mtx_lock(&vmmdev_mtx);
+		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+		mtx_unlock(&vmmdev_mtx);
+	}
+
+	free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	char buf[VM_MAX_NAMELEN];
+	struct vmmdev_softc *sc;
+
+	strlcpy(buf, "beavis", sizeof(buf));
+	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	/*
+	 * XXX TODO if any process has this device open then fail
+	 */
+
+	mtx_lock(&vmmdev_mtx);
+	sc = vmmdev_lookup(buf);
+	if (sc == NULL) {
+		mtx_unlock(&vmmdev_mtx);
+		return (EINVAL);
+	}
+
+	sc->cdev->si_drv1 = NULL;
+	mtx_unlock(&vmmdev_mtx);
+
+	vmmdev_destroy(sc, TRUE);
+
+	return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
+	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
+
+static struct cdevsw vmmdevsw = {
+	.d_name		= "vmmdev",
+	.d_version	= D_VERSION,
+	.d_ioctl	= vmmdev_ioctl,
+	.d_mmap		= vmmdev_mmap,
+	.d_read		= vmmdev_rw,
+	.d_write	= vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	struct vm *vm;
+	struct vmmdev_softc *sc, *sc2;
+	char buf[VM_MAX_NAMELEN];
+
+	strlcpy(buf, "beavis", sizeof(buf));
+	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	mtx_lock(&vmmdev_mtx);
+	sc = vmmdev_lookup(buf);
+	mtx_unlock(&vmmdev_mtx);
+	if (sc != NULL)
+		return (EEXIST);
+
+	error = vm_create(buf, &vm);
+	if (error != 0)
+		return (error);
+
+	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+	sc->vm = vm;
+
+	/*
+	 * Lookup the name again just in case somebody sneaked in when we
+	 * dropped the lock.
+	 */
+	mtx_lock(&vmmdev_mtx);
+	sc2 = vmmdev_lookup(buf);
+	if (sc2 == NULL)
+		SLIST_INSERT_HEAD(&head, sc, link);
+	mtx_unlock(&vmmdev_mtx);
+
+	if (sc2 != NULL) {
+		vmmdev_destroy(sc, FALSE);
+		return (EEXIST);
+	}
+
+	sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
+			    "vmm/%s", buf);
+	sc->cdev->si_drv1 = sc;
+
+	return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
+	    NULL, 0, sysctl_vmm_create, "A", NULL);
+
+void
+vmmdev_init(void)
+{
+	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
+}
+
+int
+vmmdev_cleanup(void)
+{
+	int error;
+
+	if (SLIST_EMPTY(&head))
+		error = 0;
+	else
+		error = EBUSY;
+
+	return (error);
+}
diff --git a/sys/amd64/vmm/vmm_host.c b/sys/amd64/vmm/vmm_host.c
new file mode 100644
index 0000000..8dfef73
--- /dev/null
+++ b/sys/amd64/vmm/vmm_host.c
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pcpu.h>
+
+#include <machine/cpufunc.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+
+#include "vmm_host.h"
+
+static uint64_t vmm_host_efer, vmm_host_pat, vmm_host_cr0, vmm_host_cr4;
+
+void
+vmm_host_state_init(void)
+{
+
+	vmm_host_efer = rdmsr(MSR_EFER);
+	vmm_host_pat = rdmsr(MSR_PAT);
+
+	/*
+	 * We always want CR0.TS to be set when the processor does a VM exit.
+	 *
+	 * With emulation turned on unconditionally after a VM exit, we are
+	 * able to trap inadvertent use of the FPU until the guest FPU state
+	 * has been safely squirreled away.
+	 */
+	vmm_host_cr0 = rcr0() | CR0_TS;
+
+	vmm_host_cr4 = rcr4();
+}
+
+uint64_t
+vmm_get_host_pat(void)
+{
+
+	return (vmm_host_pat);
+}
+
+uint64_t
+vmm_get_host_efer(void)
+{
+
+	return (vmm_host_efer);
+}
+
+uint64_t
+vmm_get_host_cr0(void)
+{
+
+	return (vmm_host_cr0);
+}
+
+uint64_t
+vmm_get_host_cr4(void)
+{
+
+	return (vmm_host_cr4);
+}
+
+uint64_t
+vmm_get_host_datasel(void)
+{
+
+	return (GSEL(GDATA_SEL, SEL_KPL));
+
+}
+
+uint64_t
+vmm_get_host_codesel(void)
+{
+
+	return (GSEL(GCODE_SEL, SEL_KPL));
+}
+
+uint64_t
+vmm_get_host_tsssel(void)
+{
+
+	return (GSEL(GPROC0_SEL, SEL_KPL));
+}
+
+uint64_t
+vmm_get_host_fsbase(void)
+{
+
+	return (0);
+}
+
+uint64_t
+vmm_get_host_idtrbase(void)
+{
+
+	return (r_idt.rd_base);
+}
diff --git a/sys/amd64/vmm/vmm_host.h b/sys/amd64/vmm/vmm_host.h
new file mode 100644
index 0000000..839f54a
--- /dev/null
+++ b/sys/amd64/vmm/vmm_host.h
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_HOST_H_
+#define	_VMM_HOST_H_
+
+#ifndef	_KERNEL
+#error "no user-servicable parts inside"
+#endif
+
+void vmm_host_state_init(void);
+
+uint64_t vmm_get_host_pat(void);
+uint64_t vmm_get_host_efer(void);
+uint64_t vmm_get_host_cr0(void);
+uint64_t vmm_get_host_cr4(void);
+uint64_t vmm_get_host_datasel(void);
+uint64_t vmm_get_host_codesel(void);
+uint64_t vmm_get_host_tsssel(void);
+uint64_t vmm_get_host_fsbase(void);
+uint64_t vmm_get_host_idtrbase(void);
+
+/*
+ * Inline access to host state that is used on every VM entry
+ */
+static __inline uint64_t
+vmm_get_host_trbase(void)
+{
+
+	return ((uint64_t)PCPU_GET(tssp));
+}
+
+static __inline uint64_t
+vmm_get_host_gdtrbase(void)
+{
+
+	return ((uint64_t)&gdt[NGDT * curcpu]);
+}
+
+struct pcpu;
+extern struct pcpu __pcpu[];
+
+static __inline uint64_t
+vmm_get_host_gsbase(void)
+{
+
+	return ((uint64_t)&__pcpu[curcpu]);
+}
+
+#endif
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
new file mode 100644
index 0000000..7b480bd
--- /dev/null
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,867 @@
+/*-
+ * Copyright (c) 2012 Sandvine, Inc.
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#else	/* !_KERNEL */
+#include <sys/types.h>
+#include <sys/errno.h>
+
+#include <machine/vmm.h>
+
+#include <vmmapi.h>
+#endif	/* _KERNEL */
+
+
+
+/* struct vie_op.op_type */
+enum {
+	VIE_OP_TYPE_NONE = 0,
+	VIE_OP_TYPE_MOV,
+	VIE_OP_TYPE_AND,
+	VIE_OP_TYPE_LAST
+};
+
+/* struct vie_op.op_flags */
+#define	VIE_OP_F_IMM		(1 << 0)	/* immediate operand present */
+#define	VIE_OP_F_IMM8		(1 << 1)	/* 8-bit immediate operand */
+
+static const struct vie_op one_byte_opcodes[256] = {
+	[0x88] = {
+		.op_byte = 0x88,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0x89] = {
+		.op_byte = 0x89,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0x8B] = {
+		.op_byte = 0x8B,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0xC7] = {
+		.op_byte = 0xC7,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_IMM,
+	},
+	[0x23] = {
+		.op_byte = 0x23,
+		.op_type = VIE_OP_TYPE_AND,
+	},
+	[0x81] = {
+		/* XXX Group 1 extended opcode - not just AND */
+		.op_byte = 0x81,
+		.op_type = VIE_OP_TYPE_AND,
+		.op_flags = VIE_OP_F_IMM,
+	}
+};
+
+/* struct vie.mod */
+#define	VIE_MOD_INDIRECT		0
+#define	VIE_MOD_INDIRECT_DISP8		1
+#define	VIE_MOD_INDIRECT_DISP32		2
+#define	VIE_MOD_DIRECT			3
+
+/* struct vie.rm */
+#define	VIE_RM_SIB			4
+#define	VIE_RM_DISP32			5
+
+#define	GB				(1024 * 1024 * 1024)
+
+static enum vm_reg_name gpr_map[16] = {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15
+};
+
+static uint64_t size2mask[] = {
+	[1] = 0xff,
+	[2] = 0xffff,
+	[4] = 0xffffffff,
+	[8] = 0xffffffffffffffff,
+};
+
+static int
+vie_valid_register(enum vm_reg_name reg)
+{
+#ifdef _KERNEL
+	/*
+	 * XXX
+	 * The operand register in which we store the result of the
+	 * read must be a GPR that we can modify even if the vcpu
+	 * is "running". All the GPRs qualify except for %rsp.
+	 *
+	 * This is a limitation of the vm_set_register() API
+	 * and can be fixed if necessary.
+	 */
+	if (reg == VM_REG_GUEST_RSP)
+		return (0);
+#endif
+	return (1);
+}
+
+static int
+vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
+{
+	int error;
+
+	if (!vie_valid_register(reg))
+		return (EINVAL);
+
+	error = vm_get_register(vm, vcpuid, reg, rval);
+
+	return (error);
+}
+
+static int
+vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
+{
+	uint64_t val;
+	int error, rshift;
+	enum vm_reg_name reg;
+
+	rshift = 0;
+	reg = gpr_map[vie->reg];
+
+	/*
+	 * 64-bit mode imposes limitations on accessing legacy byte registers.
+	 *
+	 * The legacy high-byte registers cannot be addressed if the REX
+	 * prefix is present. In this case the values 4, 5, 6 and 7 of the
+	 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
+	 *
+	 * If the REX prefix is not present then the values 4, 5, 6 and 7
+	 * of the 'ModRM:reg' field address the legacy high-byte registers,
+	 * %ah, %ch, %dh and %bh respectively.
+	 */
+	if (!vie->rex_present) {
+		if (vie->reg & 0x4) {
+			/*
+			 * Obtain the value of %ah by reading %rax and shifting
+			 * right by 8 bits (same for %bh, %ch and %dh).
+			 */
+			rshift = 8;
+			reg = gpr_map[vie->reg & 0x3];
+		}
+	}
+
+	if (!vie_valid_register(reg))
+		return (EINVAL);
+
+	error = vm_get_register(vm, vcpuid, reg, &val);
+	*rval = val >> rshift;
+	return (error);
+}
+
+static int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+		    uint64_t val, int size)
+{
+	int error;
+	uint64_t origval;
+
+	if (!vie_valid_register(reg))
+		return (EINVAL);
+
+	switch (size) {
+	case 1:
+	case 2:
+		error = vie_read_register(vm, vcpuid, reg, &origval);
+		if (error)
+			return (error);
+		val &= size2mask[size];
+		val |= origval & ~size2mask[size];
+		break;
+	case 4:
+		val &= 0xffffffffUL;
+		break;
+	case 8:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	error = vm_set_register(vm, vcpuid, reg, val);
+	return (error);
+}
+
+/*
+ * The following simplifying assumptions are made during emulation:
+ *
+ * - guest is in 64-bit mode
+ *   - default address size is 64-bits
+ *   - default operand size is 32-bits
+ *
+ * - operand size override is not supported
+ *
+ * - address size override is not supported
+ */
+static int
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	enum vm_reg_name reg;
+	uint8_t byte;
+	uint64_t val;
+
+	size = 4;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x88:
+		/*
+		 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
+		 * 88/r:	mov r/m8, r8
+		 * REX + 88/r:	mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
+		 */
+		size = 1;
+		error = vie_read_bytereg(vm, vcpuid, vie, &byte);
+		if (error == 0)
+			error = memwrite(vm, vcpuid, gpa, byte, size, arg);
+		break;
+	case 0x89:
+		/*
+		 * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
+		 * 89/r:	mov r/m32, r32
+		 * REX.W + 89/r	mov r/m64, r64
+		 */
+		if (vie->rex_w)
+			size = 8;
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val);
+		if (error == 0) {
+			val &= size2mask[size];
+			error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		}
+		break;
+	case 0x8B:
+		/*
+		 * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
+		 * 8B/r:	mov r32, r/m32
+		 * REX.W 8B/r:	mov r64, r/m64
+		 */
+		if (vie->rex_w)
+			size = 8;
+		error = memread(vm, vcpuid, gpa, &val, size, arg);
+		if (error == 0) {
+			reg = gpr_map[vie->reg];
+			error = vie_update_register(vm, vcpuid, reg, val, size);
+		}
+		break;
+	case 0xC7:
+		/*
+		 * MOV from imm32 to mem (ModRM:r/m)
+		 * C7/0		mov r/m32, imm32
+		 * REX.W + C7/0	mov r/m64, imm32 (sign-extended to 64-bits)
+		 */
+		val = vie->immediate;		/* already sign-extended */
+
+		if (vie->rex_w)
+			size = 8;
+
+		if (size != 8)
+			val &= size2mask[size];
+
+		error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		break;
+	default:
+		break;
+	}
+
+	return (error);
+}
+
+static int
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	enum vm_reg_name reg;
+	uint64_t val1, val2;
+
+	size = 4;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x23:
+		/*
+		 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
+		 * result in reg.
+		 *
+		 * 23/r		and r32, r/m32
+		 * REX.W + 23/r	and r64, r/m64
+		 */
+		if (vie->rex_w)
+			size = 8;
+
+		/* get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val1);
+		if (error)
+			break;
+
+		/* get the second operand */
+		error = memread(vm, vcpuid, gpa, &val2, size, arg);
+		if (error)
+			break;
+
+		/* perform the operation and write the result */
+		val1 &= val2;
+		error = vie_update_register(vm, vcpuid, reg, val1, size);
+		break;
+	case 0x81:
+		/*
+		 * AND reg (ModRM:reg) with immediate and store the
+		 * result in reg
+		 *
+		 * 81/          and r/m32, imm32
+		 * REX.W + 81/  and r/m64, imm32 sign-extended to 64
+		 *
+		 * Currently, only the AND operation of the 0x81 opcode
+		 * is implemented (ModRM:reg = b100).
+		 */
+		if ((vie->reg & 7) != 4)
+			break;
+
+		if (vie->rex_w)
+			size = 8;
+		
+		/* get the first operand */
+                error = memread(vm, vcpuid, gpa, &val1, size, arg);
+                if (error)
+			break;
+
+                /*
+		 * perform the operation with the pre-fetched immediate
+		 * operand and write the result
+		 */
+                val1 &= vie->immediate;
+                error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+		break;
+	default:
+		break;
+	}
+	return (error);
+}
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+			mem_region_read_t memread, mem_region_write_t memwrite,
+			void *memarg)
+{
+	int error;
+
+	if (!vie->decoded)
+		return (EINVAL);
+
+	switch (vie->op.op_type) {
+	case VIE_OP_TYPE_MOV:
+		error = emulate_mov(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_AND:
+		error = emulate_and(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+#ifdef _KERNEL
+static void
+vie_init(struct vie *vie)
+{
+
+	bzero(vie, sizeof(struct vie));
+
+	vie->base_register = VM_REG_LAST;
+	vie->index_register = VM_REG_LAST;
+}
+
+static int
+gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
+	uint64_t *gpa, uint64_t *gpaend)
+{
+	vm_paddr_t hpa;
+	int nlevels, ptpshift, ptpindex;
+	uint64_t *ptpbase, pte, pgsize;
+
+	/*
+	 * XXX assumes 64-bit guest with 4 page walk levels
+	 */
+	nlevels = 4;
+	while (--nlevels >= 0) {
+		/* Zero out the lower 12 bits and the upper 12 bits */
+		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
+
+		hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE);
+		if (hpa == -1)
+			goto error;
+
+		ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa);
+
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gla >> ptpshift) & 0x1FF;
+		pgsize = 1UL << ptpshift;
+
+		pte = ptpbase[ptpindex];
+
+		if ((pte & PG_V) == 0)
+			goto error;
+
+		if (pte & PG_PS) {
+			if (pgsize > 1 * GB)
+				goto error;
+			else
+				break;
+		}
+
+		ptpphys = pte;
+	}
+
+	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
+	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
+	*gpa = pte | (gla & (pgsize - 1));
+	*gpaend = pte + pgsize;
+	return (0);
+
+error:
+	return (-1);
+}
+
+int
+vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
+		      uint64_t cr3, struct vie *vie)
+{
+	int n, err;
+	uint64_t hpa, gpa, gpaend, off;
+
+	/*
+	 * XXX cache previously fetched instructions using 'rip' as the tag
+	 */
+
+	if (inst_length > VIE_INST_SIZE)
+		panic("vmm_fetch_instruction: invalid length %d", inst_length);
+
+	vie_init(vie);
+
+	/* Copy the instruction into 'vie' */
+	while (vie->num_valid < inst_length) {
+		err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
+		if (err)
+			break;
+
+		off = gpa & PAGE_MASK;
+		n = min(inst_length - vie->num_valid, PAGE_SIZE - off);
+
+		hpa = vm_gpa2hpa(vm, gpa, n);
+		if (hpa == -1)
+			break;
+
+		bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n);
+
+		rip += n;
+		vie->num_valid += n;
+	}
+
+	if (vie->num_valid == inst_length)
+		return (0);
+	else
+		return (-1);
+}
+
+static int
+vie_peek(struct vie *vie, uint8_t *x)
+{
+
+	if (vie->num_processed < vie->num_valid) {
+		*x = vie->inst[vie->num_processed];
+		return (0);
+	} else
+		return (-1);
+}
+
+static void
+vie_advance(struct vie *vie)
+{
+
+	vie->num_processed++;
+}
+
+static int
+decode_rex(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	if (x >= 0x40 && x <= 0x4F) {
+		vie->rex_present = 1;
+
+		vie->rex_w = x & 0x8 ? 1 : 0;
+		vie->rex_r = x & 0x4 ? 1 : 0;
+		vie->rex_x = x & 0x2 ? 1 : 0;
+		vie->rex_b = x & 0x1 ? 1 : 0;
+
+		vie_advance(vie);
+	}
+
+	return (0);
+}
+
+static int
+decode_opcode(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->op = one_byte_opcodes[x];
+
+	if (vie->op.op_type == VIE_OP_TYPE_NONE)
+		return (-1);
+
+	vie_advance(vie);
+	return (0);
+}
+
+/*
+ * XXX assuming 32-bit or 64-bit guest
+ */
+static int
+decode_modrm(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->mod = (x >> 6) & 0x3;
+	vie->rm =  (x >> 0) & 0x7;
+	vie->reg = (x >> 3) & 0x7;
+
+	/*
+	 * A direct addressing mode makes no sense in the context of an EPT
+	 * fault. There has to be a memory access involved to cause the
+	 * EPT fault.
+	 */
+	if (vie->mod == VIE_MOD_DIRECT)
+		return (-1);
+
+	if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
+	    (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
+		/*
+		 * Table 2-5: Special Cases of REX Encodings
+		 *
+		 * mod=0, r/m=5 is used in the compatibility mode to
+		 * indicate a disp32 without a base register.
+		 *
+		 * mod!=3, r/m=4 is used in the compatibility mode to
+		 * indicate that the SIB byte is present.
+		 *
+		 * The 'b' bit in the REX prefix is don't care in
+		 * this case.
+		 */
+	} else {
+		vie->rm |= (vie->rex_b << 3);
+	}
+
+	vie->reg |= (vie->rex_r << 3);
+
+	/* SIB */
+	if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
+		goto done;
+
+	vie->base_register = gpr_map[vie->rm];
+
+	switch (vie->mod) {
+	case VIE_MOD_INDIRECT_DISP8:
+		vie->disp_bytes = 1;
+		break;
+	case VIE_MOD_INDIRECT_DISP32:
+		vie->disp_bytes = 4;
+		break;
+	case VIE_MOD_INDIRECT:
+		if (vie->rm == VIE_RM_DISP32) {
+			vie->disp_bytes = 4;
+			vie->base_register = VM_REG_LAST;	/* no base */
+		}
+		break;
+	}
+
+	/* Figure out immediate operand size (if any) */
+	if (vie->op.op_flags & VIE_OP_F_IMM)
+		vie->imm_bytes = 4;
+	else if (vie->op.op_flags & VIE_OP_F_IMM8)
+		vie->imm_bytes = 1;
+
+done:
+	vie_advance(vie);
+
+	return (0);
+}
+
+static int
+decode_sib(struct vie *vie)
+{
+	uint8_t x;
+
+	/* Proceed only if SIB byte is present */
+	if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
+		return (0);
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	/* De-construct the SIB byte */
+	vie->ss = (x >> 6) & 0x3;
+	vie->index = (x >> 3) & 0x7;
+	vie->base = (x >> 0) & 0x7;
+
+	/* Apply the REX prefix modifiers */
+	vie->index |= vie->rex_x << 3;
+	vie->base |= vie->rex_b << 3;
+
+	switch (vie->mod) {
+	case VIE_MOD_INDIRECT_DISP8:
+		vie->disp_bytes = 1;
+		break;
+	case VIE_MOD_INDIRECT_DISP32:
+		vie->disp_bytes = 4;
+		break;
+	}
+
+	if (vie->mod == VIE_MOD_INDIRECT &&
+	    (vie->base == 5 || vie->base == 13)) {
+		/*
+		 * Special case when base register is unused if mod = 0
+		 * and base = %rbp or %r13.
+		 *
+		 * Documented in:
+		 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+		 * Table 2-5: Special Cases of REX Encodings
+		 */
+		vie->disp_bytes = 4;
+	} else {
+		vie->base_register = gpr_map[vie->base];
+	}
+
+	/*
+	 * All encodings of 'index' are valid except for %rsp (4).
+	 *
+	 * Documented in:
+	 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+	 * Table 2-5: Special Cases of REX Encodings
+	 */
+	if (vie->index != 4)
+		vie->index_register = gpr_map[vie->index];
+
+	/* 'scale' makes sense only in the context of an index register */
+	if (vie->index_register < VM_REG_LAST)
+		vie->scale = 1 << vie->ss;
+
+	vie_advance(vie);
+
+	return (0);
+}
+
+static int
+decode_displacement(struct vie *vie)
+{
+	int n, i;
+	uint8_t x;
+
+	union {
+		char	buf[4];
+		int8_t	signed8;
+		int32_t	signed32;
+	} u;
+
+	if ((n = vie->disp_bytes) == 0)
+		return (0);
+
+	if (n != 1 && n != 4)
+		panic("decode_displacement: invalid disp_bytes %d", n);
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+
+	if (n == 1)
+		vie->displacement = u.signed8;		/* sign-extended */
+	else
+		vie->displacement = u.signed32;		/* sign-extended */
+
+	return (0);
+}
+
+static int
+decode_immediate(struct vie *vie)
+{
+	int i, n;
+	uint8_t x;
+	union {
+		char	buf[4];
+		int8_t	signed8;
+		int32_t	signed32;
+	} u;
+
+	if ((n = vie->imm_bytes) == 0)
+		return (0);
+
+	if (n != 1 && n != 4)
+		panic("decode_immediate: invalid imm_bytes %d", n);
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+	
+	if (n == 1)
+		vie->immediate = u.signed8;		/* sign-extended */
+	else
+		vie->immediate = u.signed32;		/* sign-extended */
+
+	return (0);
+}
+
+/*
+ * Verify that the 'guest linear address' provided as collateral of the nested
+ * page table fault matches with our instruction decoding.
+ */
+static int
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+{
+	int error;
+	uint64_t base, idx;
+
+	/* Skip 'gla' verification */
+	if (gla == VIE_INVALID_GLA)
+		return (0);
+
+	base = 0;
+	if (vie->base_register != VM_REG_LAST) {
+		error = vm_get_register(vm, cpuid, vie->base_register, &base);
+		if (error) {
+			printf("verify_gla: error %d getting base reg %d\n",
+				error, vie->base_register);
+			return (-1);
+		}
+	}
+
+	idx = 0;
+	if (vie->index_register != VM_REG_LAST) {
+		error = vm_get_register(vm, cpuid, vie->index_register, &idx);
+		if (error) {
+			printf("verify_gla: error %d getting index reg %d\n",
+				error, vie->index_register);
+			return (-1);
+		}
+	}
+
+	if (base + vie->scale * idx + vie->displacement != gla) {
+		printf("verify_gla mismatch: "
+		       "base(0x%0lx), scale(%d), index(0x%0lx), "
+		       "disp(0x%0lx), gla(0x%0lx)\n",
+		       base, vie->scale, idx, vie->displacement, gla);
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+{
+
+	if (decode_rex(vie))
+		return (-1);
+
+	if (decode_opcode(vie))
+		return (-1);
+
+	if (decode_modrm(vie))
+		return (-1);
+
+	if (decode_sib(vie))
+		return (-1);
+
+	if (decode_displacement(vie))
+		return (-1);
+	
+	if (decode_immediate(vie))
+		return (-1);
+
+	if (verify_gla(vm, cpuid, gla, vie))
+		return (-1);
+
+	vie->decoded = 1;	/* success */
+
+	return (0);
+}
+#endif	/* _KERNEL */
diff --git a/sys/amd64/vmm/vmm_ipi.c b/sys/amd64/vmm/vmm_ipi.c
new file mode 100644
index 0000000..643d326
--- /dev/null
+++ b/sys/amd64/vmm/vmm_ipi.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/segments.h>
+#include <machine/md_var.h>
+
+#include <machine/vmm.h>
+#include "vmm_ipi.h"
+
+extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
+
+/*
+ * The default is to use the IPI_AST to interrupt a vcpu.
+ */
+int vmm_ipinum = IPI_AST;
+
+CTASSERT(APIC_SPURIOUS_INT == 255);
+
+void
+vmm_ipi_init(void)
+{
+	int idx;
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	/*
+	 * Search backwards from the highest IDT vector available for use
+	 * as our IPI vector. We install the 'justreturn' handler at that
+	 * vector and use it to interrupt the vcpus.
+	 *
+	 * We do this because the IPI_AST is heavyweight and saves all
+	 * registers in the trapframe. This is overkill for our use case
+	 * which is simply to EOI the interrupt and return.
+	 */
+	idx = APIC_SPURIOUS_INT;
+	while (--idx >= APIC_IPI_INTS) {
+		ip = &idt[idx];
+		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func == (uintptr_t)&IDTVEC(rsvd)) {
+			vmm_ipinum = idx;
+			setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT,
+			       SEL_KPL, 0);
+			break;
+		}
+	}
+	
+	if (vmm_ipinum != IPI_AST && bootverbose) {
+		printf("vmm_ipi_init: installing ipi handler to interrupt "
+		       "vcpus at vector %d\n", vmm_ipinum);
+	}
+}
+
+void
+vmm_ipi_cleanup(void)
+{
+	if (vmm_ipinum != IPI_AST)
+		setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
diff --git a/sys/amd64/vmm/vmm_ipi.h b/sys/amd64/vmm/vmm_ipi.h
new file mode 100644
index 0000000..91552e3
--- /dev/null
+++ b/sys/amd64/vmm/vmm_ipi.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_IPI_H_
+#define _VMM_IPI_H_
+
+struct vm;
+
+extern int vmm_ipinum;
+
+void	vmm_ipi_init(void);
+void	vmm_ipi_cleanup(void);
+
+#endif
diff --git a/sys/amd64/vmm/vmm_ktr.h b/sys/amd64/vmm/vmm_ktr.h
new file mode 100644
index 0000000..e691c61
--- /dev/null
+++ b/sys/amd64/vmm/vmm_ktr.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_KTR_H_
+#define	_VMM_KTR_H_
+
+#include <sys/ktr.h>
+#include <sys/pcpu.h>
+
+#define	KTR_VMM	KTR_GEN
+
+#define	VMM_CTR0(vm, vcpuid, format)					\
+CTR3(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu)
+
+#define	VMM_CTR1(vm, vcpuid, format, p1)				\
+CTR4(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
+			(p1))
+
+#define	VMM_CTR2(vm, vcpuid, format, p1, p2)				\
+CTR5(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
+			(p1), (p2))
+
+#define	VMM_CTR3(vm, vcpuid, format, p1, p2, p3)			\
+CTR6(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \
+			(p1), (p2), (p3))
+#endif
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
new file mode 100644
index 0000000..d024b71
--- /dev/null
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -0,0 +1,201 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+
+#include <x86/specialreg.h>
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+#include "vmm_ipi.h"
+#include "vmm_lapic.h"
+#include "vlapic.h"
+
+int
+lapic_pending_intr(struct vm *vm, int cpu)
+{
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	return (vlapic_pending_intr(vlapic));
+}
+
+void
+lapic_intr_accepted(struct vm *vm, int cpu, int vector)
+{
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	vlapic_intr_accepted(vlapic, vector);
+}
+
+int
+lapic_set_intr(struct vm *vm, int cpu, int vector)
+{
+	struct vlapic *vlapic;
+
+	if (cpu < 0 || cpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (vector < 32 || vector > 255)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	vlapic_set_intr_ready(vlapic, vector);
+
+	vm_interrupt_hostcpu(vm, cpu);
+
+	return (0);
+}
+
+int
+lapic_timer_tick(struct vm *vm, int cpu)
+{
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	return (vlapic_timer_tick(vlapic));
+}
+
+static boolean_t
+x2apic_msr(u_int msr)
+{
+	if (msr >= 0x800 && msr <= 0xBFF)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+static u_int
+x2apic_msr_to_regoff(u_int msr)
+{
+
+	return ((msr - 0x800) << 4);
+}
+
+boolean_t
+lapic_msr(u_int msr)
+{
+
+	if (x2apic_msr(msr) || (msr == MSR_APICBASE))
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+int
+lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
+{
+	int error;
+	u_int offset;
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	if (msr == MSR_APICBASE) {
+		*rval = vlapic_get_apicbase(vlapic);
+		error = 0;
+	} else {
+		offset = x2apic_msr_to_regoff(msr);
+		error = vlapic_op_mem_read(vlapic, offset, DWORD, rval);
+	}
+
+	return (error);
+}
+
+int
+lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
+{
+	int error;
+	u_int offset;
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	if (msr == MSR_APICBASE) {
+		vlapic_set_apicbase(vlapic, val);
+		error = 0;
+	} else {
+		offset = x2apic_msr_to_regoff(msr);
+		error = vlapic_op_mem_write(vlapic, offset, DWORD, val);
+	}
+
+	return (error);
+}
+
+int
+lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
+		 void *arg)
+{
+	int error;
+	uint64_t off;
+	struct vlapic *vlapic;
+
+	off = gpa - DEFAULT_APIC_BASE;
+
+	/*
+	 * Memory mapped local apic accesses must be 4 bytes wide and
+	 * aligned on a 16-byte boundary.
+	 */
+	if (size != 4 || off & 0xf)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	error = vlapic_op_mem_write(vlapic, off, DWORD, wval);
+	return (error);
+}
+
+int
+lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
+		void *arg)
+{
+	int error;
+	uint64_t off;
+	struct vlapic *vlapic;
+
+	off = gpa - DEFAULT_APIC_BASE;
+
+	/*
+	 * Memory mapped local apic accesses must be 4 bytes wide and
+	 * aligned on a 16-byte boundary.
+	 */
+	if (size != 4 || off & 0xf)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	error = vlapic_op_mem_read(vlapic, off, DWORD, rval);
+	return (error);
+}
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
new file mode 100644
index 0000000..a79912e
--- /dev/null
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -0,0 +1,71 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_LAPIC_H_
+#define	_VMM_LAPIC_H_
+
+struct vm;
+
+boolean_t lapic_msr(u_int num);
+int	lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
+int	lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
+
+int	lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
+			uint64_t *rval, int size, void *arg);
+int	lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
+			 uint64_t wval, int size, void *arg);
+
+int	lapic_timer_tick(struct vm *vm, int cpu);
+
+/*
+ * Returns a vector between 32 and 255 if an interrupt is pending in the
+ * IRR that can be delivered based on the current state of ISR and TPR.
+ *
+ * Note that the vector does not automatically transition to the ISR as a
+ * result of calling this function.
+ *
+ * Returns -1 if there is no eligible vector that can be delivered to the
+ * guest at this time.
+ */
+int	lapic_pending_intr(struct vm *vm, int cpu);
+
+/*
+ * Transition 'vector' from IRR to ISR. This function is called with the
+ * vector returned by 'lapic_pending_intr()' when the guest is able to
+ * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
+ * block interrupt delivery).
+ */
+void	lapic_intr_accepted(struct vm *vm, int cpu, int vector);
+
+/*
+ * Signals to the LAPIC that an interrupt at 'vector' needs to be generated
+ * to the 'cpu', the state is recorded in IRR.
+ */
+int	lapic_set_intr(struct vm *vm, int cpu, int vector);
+
+#endif
diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem.c
new file mode 100644
index 0000000..04f99b1
--- /dev/null
+++ b/sys/amd64/vmm/vmm_mem.c
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/linker.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/pc/bios.h>
+#include <machine/vmparam.h>
+#include <machine/pmap.h>
+
+#include "vmm_util.h"
+#include "vmm_mem.h"
+
+SYSCTL_DECL(_hw_vmm);
+
+static u_long pages_allocated;
+SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
+	     &pages_allocated, 0, "4KB pages allocated");
+
+static void
+update_pages_allocated(int howmany)
+{
+	pages_allocated += howmany;	/* XXX locking? */
+}
+
+int
+vmm_mem_init(void)
+{
+
+	return (0);
+}
+
+vm_paddr_t
+vmm_mem_alloc(size_t size)
+{
+	int flags;
+	vm_page_t m;
+	vm_paddr_t pa;
+
+	if (size != PAGE_SIZE)
+		panic("vmm_mem_alloc: invalid allocation size %lu", size);
+
+	flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		VM_ALLOC_ZERO;
+
+	while (1) {
+		/*
+		 * XXX need policy to determine when to back off the allocation
+		 */
+		m = vm_page_alloc(NULL, 0, flags);
+		if (m == NULL)
+			VM_WAIT;
+		else
+			break;
+	}
+
+	pa = VM_PAGE_TO_PHYS(m);
+	
+	if ((m->flags & PG_ZERO) == 0)
+		pagezero((void *)PHYS_TO_DMAP(pa));
+	m->valid = VM_PAGE_BITS_ALL;
+
+	update_pages_allocated(1);
+
+	return (pa);
+}
+
+void
+vmm_mem_free(vm_paddr_t base, size_t length)
+{
+	vm_page_t m;
+
+	if (base & PAGE_MASK) {
+		panic("vmm_mem_free: base 0x%0lx must be aligned on a "
+		      "0x%0x boundary\n", base, PAGE_SIZE);
+	}
+
+	if (length != PAGE_SIZE)
+		panic("vmm_mem_free: invalid length %lu", length);
+
+	m = PHYS_TO_VM_PAGE(base);
+	m->wire_count--;
+	vm_page_free(m);
+	atomic_subtract_int(&cnt.v_wire_count, 1);
+
+	update_pages_allocated(-1);
+}
+
+vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+
+	return (ptoa(Maxmem));
+}
diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h
new file mode 100644
index 0000000..7d45c74
--- /dev/null
+++ b/sys/amd64/vmm/vmm_mem.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_MEM_H_
+#define	_VMM_MEM_H_
+
+int		vmm_mem_init(void);
+vm_paddr_t	vmm_mem_alloc(size_t size);
+void		vmm_mem_free(vm_paddr_t start, size_t size);
+vm_paddr_t	vmm_mem_maxaddr(void);
+
+#endif
diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c
new file mode 100644
index 0000000..d97c819
--- /dev/null
+++ b/sys/amd64/vmm/vmm_msr.c
@@ -0,0 +1,254 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+
+#include <machine/specialreg.h>
+
+#include <machine/vmm.h>
+#include "vmm_lapic.h"
+#include "vmm_msr.h"
+
+#define	VMM_MSR_F_EMULATE	0x01
+#define	VMM_MSR_F_READONLY	0x02
+#define VMM_MSR_F_INVALID	0x04  /* guest_msr_valid() can override this */
+
+struct vmm_msr {
+	int		num;
+	int		flags;
+	uint64_t	hostval;
+};
+
+static struct vmm_msr vmm_msr[] = {
+	{ MSR_LSTAR,	0 },
+	{ MSR_CSTAR,	0 },
+	{ MSR_STAR,	0 },
+	{ MSR_SF_MASK,	0 },
+	{ MSR_PAT,      VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID },
+	{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
+	{ MSR_MCG_CAP,	VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
+};
+
+#define	vmm_msr_num	(sizeof(vmm_msr) / sizeof(vmm_msr[0]))
+CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
+
+#define	readonly_msr(idx)	\
+	((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
+
+#define	emulated_msr(idx)	\
+	((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
+
+#define invalid_msr(idx)	\
+	((vmm_msr[(idx)].flags & VMM_MSR_F_INVALID) != 0)
+
+void
+vmm_msr_init(void)
+{
+	int i;
+
+	for (i = 0; i < vmm_msr_num; i++) {
+		if (emulated_msr(i))
+			continue;
+		/*
+		 * XXX this assumes that the value of the host msr does not
+		 * change after we have cached it.
+		 */
+		vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
+	}
+}
+
+void
+guest_msrs_init(struct vm *vm, int cpu)
+{
+	int i;
+	uint64_t *guest_msrs;
+
+	guest_msrs = vm_guest_msrs(vm, cpu);
+	
+	for (i = 0; i < vmm_msr_num; i++) {
+		switch (vmm_msr[i].num) {
+		case MSR_LSTAR:
+		case MSR_CSTAR:
+		case MSR_STAR:
+		case MSR_SF_MASK:
+		case MSR_BIOS_SIGN:
+		case MSR_MCG_CAP:
+			guest_msrs[i] = 0;
+			break;
+		case MSR_PAT:
+			guest_msrs[i] = PAT_VALUE(0, PAT_WRITE_BACK)      |
+				PAT_VALUE(1, PAT_WRITE_THROUGH)   |
+				PAT_VALUE(2, PAT_UNCACHED)        |
+				PAT_VALUE(3, PAT_UNCACHEABLE)     |
+				PAT_VALUE(4, PAT_WRITE_BACK)      |
+				PAT_VALUE(5, PAT_WRITE_THROUGH)   |
+				PAT_VALUE(6, PAT_UNCACHED)        |
+				PAT_VALUE(7, PAT_UNCACHEABLE);
+			break;
+		default:
+			panic("guest_msrs_init: missing initialization for msr "
+			      "0x%0x", vmm_msr[i].num);
+		}
+	}
+}
+
+static int
+msr_num_to_idx(u_int num)
+{
+	int i;
+
+	for (i = 0; i < vmm_msr_num; i++)
+		if (vmm_msr[i].num == num)
+			return (i);
+
+	return (-1);
+}
+
+int
+emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
+{
+	int idx;
+	uint64_t *guest_msrs;
+
+	if (lapic_msr(num))
+		return (lapic_wrmsr(vm, cpu, num, val));
+
+	idx = msr_num_to_idx(num);
+	if (idx < 0 || invalid_msr(idx))
+		return (EINVAL);
+
+	if (!readonly_msr(idx)) {
+		guest_msrs = vm_guest_msrs(vm, cpu);
+
+		/* Stash the value */
+		guest_msrs[idx] = val;
+
+		/* Update processor state for non-emulated MSRs */
+		if (!emulated_msr(idx))
+			wrmsr(vmm_msr[idx].num, val);
+	}
+
+	return (0);
+}
+
+int
+emulate_rdmsr(struct vm *vm, int cpu, u_int num)
+{
+	int error, idx;
+	uint32_t eax, edx;
+	uint64_t result, *guest_msrs;
+
+	if (lapic_msr(num)) {
+		error = lapic_rdmsr(vm, cpu, num, &result);
+		goto done;
+	}
+
+	idx = msr_num_to_idx(num);
+	if (idx < 0 || invalid_msr(idx)) {
+		error = EINVAL;
+		goto done;
+	}
+
+	guest_msrs = vm_guest_msrs(vm, cpu);
+	result = guest_msrs[idx];
+
+	/*
+	 * If this is not an emulated msr register make sure that the processor
+	 * state matches our cached state.
+	 */
+	if (!emulated_msr(idx) && (rdmsr(num) != result)) {
+		panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
+		      "(0x%016lx) and actual (0x%016lx) values", num,
+		      result, rdmsr(num));
+	}
+
+	error = 0;
+
+done:
+	if (error == 0) {
+		eax = result;
+		edx = result >> 32;
+		error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
+		if (error)
+			panic("vm_set_register(rax) error %d", error);
+		error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
+		if (error)
+			panic("vm_set_register(rdx) error %d", error);
+	}
+	return (error);
+}
+
+void
+restore_guest_msrs(struct vm *vm, int cpu)
+{
+	int i;
+	uint64_t *guest_msrs;
+
+	guest_msrs = vm_guest_msrs(vm, cpu);
+
+	for (i = 0; i < vmm_msr_num; i++) {
+		if (emulated_msr(i))
+			continue;
+		else
+			wrmsr(vmm_msr[i].num, guest_msrs[i]);
+	}
+}
+
+void
+restore_host_msrs(struct vm *vm, int cpu)
+{
+	int i;
+
+	for (i = 0; i < vmm_msr_num; i++) {
+		if (emulated_msr(i))
+			continue;
+		else
+			wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
+	}
+}
+
+/*
+ * Must be called by the CPU-specific code before any guests are
+ * created
+ */
+void
+guest_msr_valid(int msr)
+{
+	int i;
+
+	for (i = 0; i < vmm_msr_num; i++) {
+		if (vmm_msr[i].num == msr && invalid_msr(i)) {
+			vmm_msr[i].flags &= ~VMM_MSR_F_INVALID;
+		}
+	}
+}
diff --git a/sys/amd64/vmm/vmm_msr.h b/sys/amd64/vmm/vmm_msr.h
new file mode 100644
index 0000000..8a1fda3
--- /dev/null
+++ b/sys/amd64/vmm/vmm_msr.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VMM_MSR_H_
+#define	_VMM_MSR_H_
+
+#define	VMM_MSR_NUM	16
+struct vm;
+
+void	vmm_msr_init(void);
+int	emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
+int	emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
+void	guest_msrs_init(struct vm *vm, int cpu);
+void	guest_msr_valid(int msr);
+void	restore_host_msrs(struct vm *vm, int cpu);
+void	restore_guest_msrs(struct vm *vm, int cpu);
+
+#endif
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
new file mode 100644
index 0000000..2143d25
--- /dev/null
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/smp.h>
+
+#include <machine/vmm.h>
+#include "vmm_util.h"
+#include "vmm_stat.h"
+
+static int vstnum;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_TYPES];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+void
+vmm_stat_init(void *arg)
+{
+	struct vmm_stat_type *vst = arg;
+
+	/* We require all stats to identify themselves with a description */
+	if (vst->desc == NULL)
+		return;
+
+	if (vst->scope == VMM_STAT_SCOPE_INTEL && !vmm_is_intel())
+		return;
+
+	if (vst->scope == VMM_STAT_SCOPE_AMD && !vmm_is_amd())
+		return;
+
+	if (vstnum >= MAX_VMM_STAT_TYPES) {
+		printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
+		return;
+	}
+
+	vst->index = vstnum;
+	vsttab[vstnum++] = vst;
+}
+
+int
+vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
+{
+	int i;
+	uint64_t *stats;
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+		
+	stats = vcpu_stats(vm, vcpu);
+	for (i = 0; i < vstnum; i++)
+		buf[i] = stats[i];
+	*num_stats = vstnum;
+	return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+	u_long size;
+	
+	size = vstnum * sizeof(uint64_t);
+
+	return (malloc(size, M_VMM_STAT, M_ZERO | M_WAITOK));
+}
+
+void
+vmm_stat_free(void *vp)
+{
+	free(vp, M_VMM_STAT);
+}
+
+const char *
+vmm_stat_desc(int index)
+{
+
+	if (index >= 0 && index < vstnum)
+		return (vsttab[index]->desc);
+	else
+		return (NULL);
+}
+
+/* global statistics */
+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
+VMM_STAT(VMEXIT_EPT_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h
new file mode 100644
index 0000000..93c7e87
--- /dev/null
+++ b/sys/amd64/vmm/vmm_stat.h
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_STAT_H_
+#define	_VMM_STAT_H_
+
+struct vm;
+
+#define	MAX_VMM_STAT_TYPES	64		/* arbitrary */
+
+enum vmm_stat_scope {
+	VMM_STAT_SCOPE_ANY,
+	VMM_STAT_SCOPE_INTEL,		/* Intel VMX specific statistic */
+	VMM_STAT_SCOPE_AMD,		/* AMD SVM specific statistic */
+};
+
+struct vmm_stat_type {
+	int	index;			/* position in the stats buffer */
+	const char *desc;		/* description of statistic */
+	enum vmm_stat_scope scope;
+};
+
+void	vmm_stat_init(void *arg);
+
+#define	VMM_STAT_DEFINE(type, desc, scope)				\
+	struct vmm_stat_type type[1] = {				\
+		{ -1, desc, scope }					\
+	};								\
+	SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type)
+
+#define	VMM_STAT_DECLARE(type)						\
+	extern struct vmm_stat_type type[1]
+
+#define	VMM_STAT(type, desc)		\
+	VMM_STAT_DEFINE(type, desc, VMM_STAT_SCOPE_ANY)
+#define	VMM_STAT_INTEL(type, desc)	\
+	VMM_STAT_DEFINE(type, desc, VMM_STAT_SCOPE_INTEL)
+#define	VMM_STAT_AMD(type, desc)	\
+	VMM_STAT_DEFINE(type, desc, VMM_STAT_SCOPE_AMD)
+
+void	*vmm_stat_alloc(void);
+void 	vmm_stat_free(void *vp);
+
+/*
+ * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
+ */
+int	vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
+const char *vmm_stat_desc(int index);
+
+static void __inline
+vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+#ifdef	VMM_KEEP_STATS
+	uint64_t *stats = vcpu_stats(vm, vcpu);
+	if (vst->index >= 0)
+		stats[vst->index] += x;
+#endif
+}
+
+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_EXTINT);
+VMM_STAT_DECLARE(VMEXIT_HLT);
+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
+VMM_STAT_DECLARE(VMEXIT_RDMSR);
+VMM_STAT_DECLARE(VMEXIT_WRMSR);
+VMM_STAT_DECLARE(VMEXIT_MTRAP);
+VMM_STAT_DECLARE(VMEXIT_PAUSE);
+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_INOUT);
+VMM_STAT_DECLARE(VMEXIT_CPUID);
+VMM_STAT_DECLARE(VMEXIT_EPT_FAULT);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+#endif
diff --git a/sys/amd64/vmm/vmm_support.S b/sys/amd64/vmm/vmm_support.S
new file mode 100644
index 0000000..2afc608
--- /dev/null
+++ b/sys/amd64/vmm/vmm_support.S
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define	LOCORE
+
+#include <machine/asmacros.h>
+
+#define	LA_EOI	0xB0
+
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(justreturn)
+	pushq	%rax
+	movq	lapic, %rax
+	movl	$0, LA_EOI(%rax)
+	popq	%rax
+	iretq
diff --git a/sys/amd64/vmm/vmm_util.c b/sys/amd64/vmm/vmm_util.c
new file mode 100644
index 0000000..f245f92
--- /dev/null
+++ b/sys/amd64/vmm/vmm_util.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+
+#include <machine/md_var.h>
+
+#include "vmm_util.h"
+
+boolean_t
+vmm_is_intel(void)
+{
+
+	if (strcmp(cpu_vendor, "GenuineIntel") == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+boolean_t
+vmm_is_amd(void)
+{
+	if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+boolean_t
+vmm_supports_1G_pages(void)
+{
+	unsigned int regs[4];
+
+	/*
+	 * CPUID.80000001:EDX[bit 26] = 1 indicates support for 1GB pages
+	 *
+	 * Both Intel and AMD support this bit.
+	 */
+	if (cpu_exthigh >= 0x80000001) {
+		do_cpuid(0x80000001, regs);
+		if (regs[3] & (1 << 26))
+			return (TRUE);
+	}
+	return (FALSE);
+}
+
+#include <sys/proc.h>
+#include <machine/frame.h>
+#define	DUMP_REG(x)	printf(#x "\t\t0x%016lx\n", (long)(tf->tf_ ## x))
+#define	DUMP_SEG(x)	printf(#x "\t\t0x%04x\n", (unsigned)(tf->tf_ ## x))
+void
+dump_trapframe(struct trapframe *tf)
+{
+	DUMP_REG(rdi);
+	DUMP_REG(rsi);
+	DUMP_REG(rdx);
+	DUMP_REG(rcx);
+	DUMP_REG(r8);
+	DUMP_REG(r9);
+	DUMP_REG(rax);
+	DUMP_REG(rbx);
+	DUMP_REG(rbp);
+	DUMP_REG(r10);
+	DUMP_REG(r11);
+	DUMP_REG(r12);
+	DUMP_REG(r13);
+	DUMP_REG(r14);
+	DUMP_REG(r15);
+	DUMP_REG(trapno);
+	DUMP_REG(addr);
+	DUMP_REG(flags);
+	DUMP_REG(err);
+	DUMP_REG(rip);
+	DUMP_REG(rflags);
+	DUMP_REG(rsp);
+	DUMP_SEG(cs);
+	DUMP_SEG(ss);
+	DUMP_SEG(fs);
+	DUMP_SEG(gs);
+	DUMP_SEG(es);
+	DUMP_SEG(ds);
+}
diff --git a/sys/amd64/vmm/vmm_util.h b/sys/amd64/vmm/vmm_util.h
new file mode 100644
index 0000000..7f82332
--- /dev/null
+++ b/sys/amd64/vmm/vmm_util.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_UTIL_H_
+#define	_VMM_UTIL_H_
+
+struct trapframe;
+
+boolean_t	vmm_is_intel(void);
+boolean_t	vmm_is_amd(void);
+boolean_t	vmm_supports_1G_pages(void);
+
+void		dump_trapframe(struct trapframe *tf);
+
+#endif
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
new file mode 100644
index 0000000..fa2eabc
--- /dev/null
+++ b/sys/amd64/vmm/x86.c
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+
+#include <machine/clock.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#include <machine/vmm.h>
+
+#include "x86.h"
+
+#define	CPUID_VM_HIGH		0x40000000
+
+static const char bhyve_id[12] = "BHyVE BHyVE ";
+
+int
+x86_emulate_cpuid(struct vm *vm, int vcpu_id,
+		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+	int error;
+	unsigned int 	func, regs[4];
+	enum x2apic_state x2apic_state;
+
+	/*
+	 * Requests for invalid CPUID levels should map to the highest
+	 * available level instead.
+	 */
+	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
+		if (*eax > cpu_exthigh)
+			*eax = cpu_exthigh;
+	} else if (*eax >= 0x40000000) {
+		if (*eax > CPUID_VM_HIGH)
+			*eax = CPUID_VM_HIGH;
+	} else if (*eax > cpu_high) {
+		*eax = cpu_high;
+	}
+
+	func = *eax;
+
+	/*
+	 * In general the approach used for CPU topology is to
+	 * advertise a flat topology where all CPUs are packages with
+	 * no multi-core or SMT.
+	 */
+	switch (func) {
+		case CPUID_0000_0000:
+		case CPUID_0000_0002:
+		case CPUID_0000_0003:
+		case CPUID_0000_000A:
+			cpuid_count(*eax, *ecx, regs);
+			break;
+
+		case CPUID_8000_0000:
+		case CPUID_8000_0001:
+		case CPUID_8000_0002:
+		case CPUID_8000_0003:
+		case CPUID_8000_0004:
+		case CPUID_8000_0006:
+		case CPUID_8000_0008:
+			cpuid_count(*eax, *ecx, regs);
+			break;
+
+		case CPUID_8000_0007:
+			cpuid_count(*eax, *ecx, regs);
+			/*
+			 * If the host TSCs are not synchronized across
+			 * physical cpus then we cannot advertise an
+			 * invariant tsc to a vcpu.
+			 *
+			 * XXX This still falls short because the vcpu
+			 * can observe the TSC moving backwards as it
+			 * migrates across physical cpus. But at least
+			 * it should discourage the guest from using the
+			 * TSC to keep track of time.
+			 */
+			if (!smp_tsc)
+				regs[3] &= ~AMDPM_TSC_INVARIANT;
+			break;
+
+		case CPUID_0000_0001:
+			do_cpuid(1, regs);
+
+			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
+			if (error) {
+				panic("x86_emulate_cpuid: error %d "
+				      "fetching x2apic state", error);
+			}
+
+			/*
+			 * Override the APIC ID only in ebx
+			 */
+			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
+			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
+
+			/*
+			 * Don't expose VMX, SpeedStep or TME capability.
+			 * Advertise x2APIC capability and Hypervisor guest.
+			 */
+			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
+
+			regs[2] |= CPUID2_HV;
+
+			if (x2apic_state != X2APIC_DISABLED)
+				regs[2] |= CPUID2_X2APIC;
+
+			/*
+			 * Hide xsave/osxsave/avx until the FPU save/restore
+			 * issues are resolved
+			 */
+			regs[2] &= ~(CPUID2_XSAVE | CPUID2_OSXSAVE |
+				     CPUID2_AVX);
+
+			/*
+			 * Hide monitor/mwait until we know how to deal with
+			 * these instructions.
+			 */
+			regs[2] &= ~CPUID2_MON;
+
+			/*
+			 * Hide thermal monitoring
+			 */
+			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
+			
+			/*
+			 * Machine check handling is done in the host.
+			 * Hide MTRR capability.
+			 */
+			regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
+
+			/*
+			 * Disable multi-core.
+			 */
+			regs[1] &= ~CPUID_HTT_CORES;
+			regs[3] &= ~CPUID_HTT;
+			break;
+
+		case CPUID_0000_0004:
+			do_cpuid(4, regs);
+
+			/*
+			 * Do not expose topology.
+			 */
+			regs[0] &= 0xffff8000;
+			regs[0] |= 0x04008000;
+			break;
+
+		case CPUID_0000_0006:
+		case CPUID_0000_0007:
+			/*
+			 * Handle the access, but report 0 for
+			 * all options
+			 */
+			regs[0] = 0;
+			regs[1] = 0;
+			regs[2] = 0;
+			regs[3] = 0;
+			break;
+
+		case CPUID_0000_000B:
+			/*
+			 * Processor topology enumeration
+			 */
+			regs[0] = 0;
+			regs[1] = 0;
+			regs[2] = *ecx & 0xff;
+			regs[3] = vcpu_id;
+			break;
+
+		case 0x40000000:
+			regs[0] = CPUID_VM_HIGH;
+			bcopy(bhyve_id, &regs[1], 4);
+			bcopy(bhyve_id, &regs[2], 4);
+			bcopy(bhyve_id, &regs[3], 4);
+			break;
+		default:
+			/* XXX: Leaf 5? */
+			return (0);
+	}
+
+	*eax = regs[0];
+	*ebx = regs[1];
+	*ecx = regs[2];
+	*edx = regs[3];
+	return (1);
+}
diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h
new file mode 100644
index 0000000..368e967
--- /dev/null
+++ b/sys/amd64/vmm/x86.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _X86_H_
+#define	_X86_H_
+
+#define CPUID_0000_0000 (0x0)
+#define CPUID_0000_0001	(0x1)
+#define CPUID_0000_0002 (0x2)
+#define CPUID_0000_0003 (0x3)
+#define CPUID_0000_0004 (0x4)
+#define CPUID_0000_0006 (0x6)
+#define CPUID_0000_0007 (0x7)
+#define	CPUID_0000_000A	(0xA)
+#define	CPUID_0000_000B	(0xB)
+#define CPUID_8000_0000	(0x80000000)
+#define CPUID_8000_0001	(0x80000001)
+#define CPUID_8000_0002	(0x80000002)
+#define CPUID_8000_0003	(0x80000003)
+#define CPUID_8000_0004	(0x80000004)
+#define CPUID_8000_0006	(0x80000006)
+#define CPUID_8000_0007	(0x80000007)
+#define CPUID_8000_0008	(0x80000008)
+
+/*
+ * CPUID instruction Fn0000_0001:
+ */
+#define CPUID_0000_0001_APICID_MASK			(0xff<<24)
+#define CPUID_0000_0001_APICID_SHIFT			24
+
+/*
+ * CPUID instruction Fn0000_0001 ECX
+ */
+#define CPUID_0000_0001_FEAT0_VMX	(1<<5)
+
+int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx,
+		      uint32_t *ecx, uint32_t *edx);
+
+#endif