summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2012-11-11 03:26:14 +0000
committerneel <neel@FreeBSD.org>2012-11-11 03:26:14 +0000
commitbc4be3dff1bc1b0cdc3ea30df0fd3e83998cf9eb (patch)
treeb6b271fb331d43e30e10d824f2042de2c063f2eb /sys/amd64
parent263c4acf84c3be71025f3484c0378a83cd668e15 (diff)
parentde6ea8b20e870490db809a8d8a965bd784981d81 (diff)
downloadFreeBSD-src-bc4be3dff1bc1b0cdc3ea30df0fd3e83998cf9eb.zip
FreeBSD-src-bc4be3dff1bc1b0cdc3ea30df0fd3e83998cf9eb.tar.gz
IFC @ r242684
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/cpu_switch.S3
-rw-r--r--sys/amd64/amd64/elf_machdep.c1
-rw-r--r--sys/amd64/amd64/fpu.c153
-rw-r--r--sys/amd64/amd64/identcpu.c21
-rw-r--r--sys/amd64/amd64/initcpu.c26
-rw-r--r--sys/amd64/amd64/intr_machdep.c566
-rw-r--r--sys/amd64/amd64/legacy.c332
-rw-r--r--sys/amd64/amd64/machdep.c7
-rw-r--r--sys/amd64/amd64/mp_machdep.c20
-rw-r--r--sys/amd64/amd64/pmap.c231
-rw-r--r--sys/amd64/amd64/ptrace_machdep.c7
-rw-r--r--sys/amd64/amd64/trap.c42
-rw-r--r--sys/amd64/amd64/vm_machdep.c4
-rw-r--r--sys/amd64/conf/GENERIC5
-rw-r--r--sys/amd64/conf/NOTES14
-rw-r--r--sys/amd64/conf/XENHVM2
-rw-r--r--sys/amd64/ia32/ia32_sigtramp.S31
-rw-r--r--sys/amd64/ia32/ia32_syscall.c2
-rw-r--r--sys/amd64/include/atomic.h4
-rw-r--r--sys/amd64/include/cpufunc.h7
-rw-r--r--sys/amd64/include/fpu.h3
-rw-r--r--sys/amd64/include/intr_machdep.h2
-rw-r--r--sys/amd64/include/md_var.h2
-rw-r--r--sys/amd64/include/param.h8
-rw-r--r--sys/amd64/include/pc/bios.h52
-rw-r--r--sys/amd64/include/pcpu.h22
-rw-r--r--sys/amd64/pci/pci_cfgreg.c25
27 files changed, 458 insertions, 1134 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 1254f3f..ed1ccb5 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -122,6 +122,9 @@ done_store_dr:
1: movq %rdx,%rcx
movl xsave_mask,%eax
movl xsave_mask+4,%edx
+ .globl ctx_switch_xsave
+ctx_switch_xsave:
+ /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
xsave (%r8)
movq %rcx,%rdx
2: smsw %ax
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 38a11c68..fdc4d56 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sysent.h>
#include <sys/imgact_elf.h>
#include <sys/syscall.h>
-#include <sys/sysent.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 7d76b58..17d2694 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr))
+#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
static __inline void
xrstor(char *addr, uint64_t mask)
@@ -106,6 +107,7 @@ void fnstsw(caddr_t addr);
void fxsave(caddr_t addr);
void fxrstor(caddr_t addr);
void ldmxcsr(u_int csr);
+void stmxcsr(u_int *csr);
void xrstor(char *addr, uint64_t mask);
void xsave(char *addr, uint64_t mask);
@@ -114,9 +116,6 @@ void xsave(char *addr, uint64_t mask);
#define start_emulating() load_cr0(rcr0() | CR0_TS)
#define stop_emulating() clts()
-#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
-#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
-
CTASSERT(sizeof(struct savefpu) == 512);
CTASSERT(sizeof(struct xstate_hdr) == 64);
CTASSERT(sizeof(struct savefpu_ymm) == 832);
@@ -133,10 +132,16 @@ static void fpu_clean_state(void);
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
NULL, 1, "Floating point instructions executed in hardware");
+static int use_xsaveopt;
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static struct savefpu *fpu_initialstate;
+struct xsave_area_elm_descr {
+ u_int offset;
+ u_int size;
+} *xsave_area_desc;
+
void
fpusave(void *addr)
{
@@ -183,6 +188,17 @@ fpuinit_bsp1(void)
TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
xsave_mask &= xsave_mask_user;
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+ /*
+ * Patch the XSAVE instruction in the cpu_switch code
+ * to XSAVEOPT. We assume that XSAVE encoding used
+ * REX byte, and set the bit 4 of the r/m byte.
+ */
+ ctx_switch_xsave[3] |= 0x10;
+ use_xsaveopt = 1;
+ }
}
/*
@@ -253,6 +269,7 @@ static void
fpuinitstate(void *arg __unused)
{
register_t saveintr;
+ int cp[4], i, max_ext_n;
fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -274,6 +291,28 @@ fpuinitstate(void *arg __unused)
*/
bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ /*
+ * Create a table describing the layout of the CPU Extended
+ * Save Area.
+ */
+ if (use_xsaveopt) {
+ max_ext_n = flsl(xsave_mask);
+ xsave_area_desc = malloc(max_ext_n * sizeof(struct
+ xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+ /* x87 state */
+ xsave_area_desc[0].offset = 0;
+ xsave_area_desc[0].size = 160;
+ /* XMM */
+ xsave_area_desc[1].offset = 160;
+ xsave_area_desc[1].size = 288 - 160;
+
+ for (i = 2; i < max_ext_n; i++) {
+ cpuid_count(0xd, i, cp);
+ xsave_area_desc[i].offset = cp[1];
+ xsave_area_desc[i].size = cp[0];
+ }
+ }
+
start_emulating();
intr_restore(saveintr);
}
@@ -289,7 +328,7 @@ fpuexit(struct thread *td)
critical_enter();
if (curthread == PCPU_GET(fpcurthread)) {
stop_emulating();
- fpusave(PCPU_GET(curpcb)->pcb_save);
+ fpusave(curpcb->pcb_save);
start_emulating();
PCPU_SET(fpcurthread, 0);
}
@@ -475,25 +514,26 @@ static char fpetable[128] = {
};
/*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ * Read the FP status and control words, then generate si_code value
+ * for SIGFPE. The error code chosen will be one of the
+ * FPE_... macros. It will be sent as the second argument to old
+ * BSD-style signal handlers and as "siginfo_t->si_code" (second
+ * argument) to SA_SIGINFO signal handlers.
*
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now
- * depend on longjmp() restoring a usable state. Restoring the state
- * or examining it might fail if we didn't clear exceptions.
+ * Some time ago, we cleared the x87 exceptions with FNCLEX there.
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
+ * usermode code which understands the FPU hardware enough to enable
+ * the exceptions, can also handle clearing the exception state in the
+ * handler. The only consequence of not clearing the exception is the
+ * rethrow of the SIGFPE on return from the signal handler and
+ * reexecution of the corresponding instruction.
*
- * The error code chosen will be one of the FPE_... macros. It will be
- * sent as the second argument to old BSD-style signal handlers and as
- * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
- *
- * XXX the FP state is not preserved across signal handlers. So signal
- * handlers cannot afford to do FP unless they preserve the state or
- * longjmp() out. Both preserving the state and longjmp()ing may be
- * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable
- * solution for signals other than SIGFPE.
+ * For XMM traps, the exceptions were never cleared.
*/
int
-fputrap()
+fputrap_x87(void)
{
+ struct savefpu *pcb_save;
u_short control, status;
critical_enter();
@@ -504,19 +544,32 @@ fputrap()
* wherever they are.
*/
if (PCPU_GET(fpcurthread) != curthread) {
- control = GET_FPU_CW(curthread);
- status = GET_FPU_SW(curthread);
+ pcb_save = curpcb->pcb_save;
+ control = pcb_save->sv_env.en_cw;
+ status = pcb_save->sv_env.en_sw;
} else {
fnstcw(&control);
fnstsw(&status);
}
- if (PCPU_GET(fpcurthread) == curthread)
- fnclex();
critical_exit();
return (fpetable[status & ((~control & 0x3f) | 0x40)]);
}
+int
+fputrap_sse(void)
+{
+ u_int mxcsr;
+
+ critical_enter();
+ if (PCPU_GET(fpcurthread) != curthread)
+ mxcsr = curpcb->pcb_save->sv_env.en_mxcsr;
+ else
+ stmxcsr(&mxcsr);
+ critical_exit();
+ return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+
/*
* Implement device not available (DNA) exception
*
@@ -530,7 +583,6 @@ static int err_count = 0;
void
fpudna(void)
{
- struct pcb *pcb;
critical_enter();
if (PCPU_GET(fpcurthread) == curthread) {
@@ -552,26 +604,31 @@ fpudna(void)
* Record new context early in case frstor causes a trap.
*/
PCPU_SET(fpcurthread, curthread);
- pcb = PCPU_GET(curpcb);
fpu_clean_state();
- if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+ if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
/*
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
* load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
*/
- fpurestore(fpu_initialstate);
- if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
- fldcw(pcb->pcb_initial_fpucw);
- if (PCB_USER_FPU(pcb))
- set_pcb_flags(pcb,
+ bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
+ fpurestore(curpcb->pcb_save);
+ if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+ fldcw(curpcb->pcb_initial_fpucw);
+ if (PCB_USER_FPU(curpcb))
+ set_pcb_flags(curpcb,
PCB_FPUINITDONE | PCB_USERFPUINITDONE);
else
- set_pcb_flags(pcb, PCB_FPUINITDONE);
+ set_pcb_flags(curpcb, PCB_FPUINITDONE);
} else
- fpurestore(pcb->pcb_save);
+ fpurestore(curpcb->pcb_save);
critical_exit();
}
@@ -597,6 +654,9 @@ int
fpugetregs(struct thread *td)
{
struct pcb *pcb;
+ uint64_t *xstate_bv, bit;
+ char *sa;
+ int max_ext_n, i;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -614,6 +674,25 @@ fpugetregs(struct thread *td)
return (_MC_FPOWNED_FPU);
} else {
critical_exit();
+ if (use_xsaveopt) {
+ /*
+ * Handle partially saved state.
+ */
+ sa = (char *)get_pcb_user_save_pcb(pcb);
+ xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+ offsetof(struct xstate_hdr, xstate_bv));
+ max_ext_n = flsl(xsave_mask);
+ for (i = 0; i < max_ext_n; i++) {
+ bit = 1 << i;
+ if ((*xstate_bv & bit) != 0)
+ continue;
+ bcopy((char *)fpu_initialstate +
+ xsave_area_desc[i].offset,
+ sa + xsave_area_desc[i].offset,
+ xsave_area_desc[i].size);
+ *xstate_bv |= bit;
+ }
+ }
return (_MC_FPOWNED_PCB);
}
}
@@ -883,16 +962,14 @@ fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
int
fpu_kern_thread(u_int flags)
{
- struct pcb *pcb;
- pcb = PCPU_GET(curpcb);
KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
("Only kthread may use fpu_kern_thread"));
- KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb),
+ KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
("mangled pcb_save"));
- KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
+ KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
- set_pcb_flags(pcb, PCB_KERNFPU);
+ set_pcb_flags(curpcb, PCB_KERNFPU);
return (0);
}
@@ -902,7 +979,7 @@ is_fpu_kern_thread(u_int flags)
if ((curthread->td_pflags & TDP_KTHREAD) == 0)
return (0);
- return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0);
+ return ((curpcb->pcb_flags & PCB_KERNFPU) != 0);
}
/*
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
index 62db86b..465316a 100644
--- a/sys/amd64/amd64/identcpu.c
+++ b/sys/amd64/amd64/identcpu.c
@@ -213,8 +213,8 @@ printcpuinfo(void)
if (cpu_vendor_id == CPU_VENDOR_INTEL ||
cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_CENTAUR) {
- printf(" Family = %x", CPUID_TO_FAMILY(cpu_id));
- printf(" Model = %x", CPUID_TO_MODEL(cpu_id));
+ printf(" Family = 0x%x", CPUID_TO_FAMILY(cpu_id));
+ printf(" Model = 0x%x", CPUID_TO_MODEL(cpu_id));
printf(" Stepping = %u", cpu_id & CPUID_STEPPING);
/*
@@ -384,6 +384,18 @@ printcpuinfo(void)
);
}
+ if (cpu_stdext_feature != 0) {
+ printf("\n Standard Extended Features=0x%b",
+ cpu_stdext_feature,
+ "\020"
+ "\001GSFSBASE"
+ "\002TSCADJ"
+ "\010SMEP"
+ "\012ENHMOVSB"
+ "\013INVPCID"
+ );
+ }
+
if (via_feature_rng != 0 || via_feature_xcrypt != 0)
print_via_padlock_info();
@@ -501,6 +513,11 @@ identify_cpu(void)
}
}
+ if (cpu_high >= 7) {
+ cpuid_count(7, 0, regs);
+ cpu_stdext_feature = regs[1];
+ }
+
if (cpu_vendor_id == CPU_VENDOR_INTEL ||
cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_CENTAUR) {
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 3890551..4abed4c 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/pcpu.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
@@ -72,6 +73,7 @@ u_int cpu_vendor_id; /* CPU vendor ID */
u_int cpu_fxsr; /* SSE enabled */
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
u_int cpu_clflush_line_size = 32;
+u_int cpu_stdext_feature;
u_int cpu_max_ext_state_size;
SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
@@ -91,11 +93,17 @@ init_amd(void)
*
* http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
* http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
+ *
+ * Hypervisors do not provide access to the errata MSR,
+ * causing #GP exception on attempt to apply the errata. The
+ * MSR write shall be done on host and persist globally
+ * anyway, so do not try to do it when under virtualization.
*/
switch (CPUID_TO_FAMILY(cpu_id)) {
case 0x10:
case 0x12:
- wrmsr(0xc0011029, rdmsr(0xc0011029) | 1);
+ if ((cpu_feature2 & CPUID2_HV) == 0)
+ wrmsr(0xc0011029, rdmsr(0xc0011029) | 1);
break;
}
}
@@ -146,11 +154,25 @@ void
initializecpu(void)
{
uint64_t msr;
+ uint32_t cr4;
+ cr4 = rcr4();
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
- load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cr4 |= CR4_FXSR | CR4_XMM;
cpu_fxsr = hw_instruction_sse = 1;
}
+ if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
+ cr4 |= CR4_FSGSBASE;
+
+ /*
+ * Postpone enabling the SMEP on the boot CPU until the page
+ * tables are switched from the boot loader identity mapping
+ * to the kernel tables. The boot loader enables the U bit in
+ * its tables.
+ */
+ if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
+ cr4 |= CR4_SMEP;
+ load_cr4(cr4);
if ((amd_feature & AMDID_NX) != 0) {
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
deleted file mode 100644
index e64f89d..0000000
--- a/sys/amd64/amd64/intr_machdep.c
+++ /dev/null
@@ -1,566 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/*
- * Machine dependent interrupt code for amd64. For amd64, we have to
- * deal with different PICs. Thus, we use the passed in vector to lookup
- * an interrupt source associated with that vector. The interrupt source
- * describes which PIC the source belongs to and includes methods to handle
- * that source.
- */
-
-#include "opt_atpic.h"
-#include "opt_ddb.h"
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/interrupt.h>
-#include <sys/ktr.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/syslog.h>
-#include <sys/systm.h>
-#include <machine/clock.h>
-#include <machine/intr_machdep.h>
-#include <machine/smp.h>
-#ifdef DDB
-#include <ddb/ddb.h>
-#endif
-
-#ifndef DEV_ATPIC
-#include <machine/segments.h>
-#include <machine/frame.h>
-#include <dev/ic/i8259.h>
-#include <x86/isa/icu.h>
-#include <x86/isa/isa.h>
-#endif
-
-#define MAX_STRAY_LOG 5
-
-typedef void (*mask_fn)(void *);
-
-static int intrcnt_index;
-static struct intsrc *interrupt_sources[NUM_IO_INTS];
-static struct mtx intr_table_lock;
-static struct mtx intrcnt_lock;
-static STAILQ_HEAD(, pic) pics;
-
-#ifdef SMP
-static int assign_cpu;
-static int round_robin_interrupts = 1;
-TUNABLE_INT("round_robin_interrupts", &round_robin_interrupts);
-#endif
-
-u_long intrcnt[INTRCNT_COUNT];
-char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)];
-size_t sintrcnt = sizeof(intrcnt);
-size_t sintrnames = sizeof(intrnames);
-
-static int intr_assign_cpu(void *arg, u_char cpu);
-static void intr_disable_src(void *arg);
-static void intr_init(void *__dummy);
-static int intr_pic_registered(struct pic *pic);
-static void intrcnt_setname(const char *name, int index);
-static void intrcnt_updatename(struct intsrc *is);
-static void intrcnt_register(struct intsrc *is);
-
-static int
-intr_pic_registered(struct pic *pic)
-{
- struct pic *p;
-
- STAILQ_FOREACH(p, &pics, pics) {
- if (p == pic)
- return (1);
- }
- return (0);
-}
-
-/*
- * Register a new interrupt controller (PIC). This is to support suspend
- * and resume where we suspend/resume controllers rather than individual
- * sources. This also allows controllers with no active sources (such as
- * 8259As in a system using the APICs) to participate in suspend and resume.
- */
-int
-intr_register_pic(struct pic *pic)
-{
- int error;
-
- mtx_lock(&intr_table_lock);
- if (intr_pic_registered(pic))
- error = EBUSY;
- else {
- STAILQ_INSERT_TAIL(&pics, pic, pics);
- error = 0;
- }
- mtx_unlock(&intr_table_lock);
- return (error);
-}
-
-/*
- * Register a new interrupt source with the global interrupt system.
- * The global interrupts need to be disabled when this function is
- * called.
- */
-int
-intr_register_source(struct intsrc *isrc)
-{
- int error, vector;
-
- KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
- vector = isrc->is_pic->pic_vector(isrc);
- if (interrupt_sources[vector] != NULL)
- return (EEXIST);
- error = intr_event_create(&isrc->is_event, isrc, 0, vector,
- intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
- (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
- vector);
- if (error)
- return (error);
- mtx_lock(&intr_table_lock);
- if (interrupt_sources[vector] != NULL) {
- mtx_unlock(&intr_table_lock);
- intr_event_destroy(isrc->is_event);
- return (EEXIST);
- }
- intrcnt_register(isrc);
- interrupt_sources[vector] = isrc;
- isrc->is_handlers = 0;
- mtx_unlock(&intr_table_lock);
- return (0);
-}
-
-struct intsrc *
-intr_lookup_source(int vector)
-{
-
- return (interrupt_sources[vector]);
-}
-
-int
-intr_add_handler(const char *name, int vector, driver_filter_t filter,
- driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
-{
- struct intsrc *isrc;
- int error;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
- error = intr_event_add_handler(isrc->is_event, name, filter, handler,
- arg, intr_priority(flags), flags, cookiep);
- if (error == 0) {
- mtx_lock(&intr_table_lock);
- intrcnt_updatename(isrc);
- isrc->is_handlers++;
- if (isrc->is_handlers == 1) {
- isrc->is_pic->pic_enable_intr(isrc);
- isrc->is_pic->pic_enable_source(isrc);
- }
- mtx_unlock(&intr_table_lock);
- }
- return (error);
-}
-
-int
-intr_remove_handler(void *cookie)
-{
- struct intsrc *isrc;
- int error;
-
- isrc = intr_handler_source(cookie);
- error = intr_event_remove_handler(cookie);
- if (error == 0) {
- mtx_lock(&intr_table_lock);
- isrc->is_handlers--;
- if (isrc->is_handlers == 0) {
- isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
- isrc->is_pic->pic_disable_intr(isrc);
- }
- intrcnt_updatename(isrc);
- mtx_unlock(&intr_table_lock);
- }
- return (error);
-}
-
-int
-intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
-{
- struct intsrc *isrc;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
- return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
-}
-
-static void
-intr_disable_src(void *arg)
-{
- struct intsrc *isrc;
-
- isrc = arg;
- isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
-}
-
-void
-intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
-{
- struct intr_event *ie;
- int vector;
-
- /*
- * We count software interrupts when we process them. The
- * code here follows previous practice, but there's an
- * argument for counting hardware interrupts when they're
- * processed too.
- */
- (*isrc->is_count)++;
- PCPU_INC(cnt.v_intr);
-
- ie = isrc->is_event;
-
- /*
- * XXX: We assume that IRQ 0 is only used for the ISA timer
- * device (clk).
- */
- vector = isrc->is_pic->pic_vector(isrc);
- if (vector == 0)
- clkintr_pending = 1;
-
- /*
- * For stray interrupts, mask and EOI the source, bump the
- * stray count, and log the condition.
- */
- if (intr_event_handle(ie, frame) != 0) {
- isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
- (*isrc->is_straycount)++;
- if (*isrc->is_straycount < MAX_STRAY_LOG)
- log(LOG_ERR, "stray irq%d\n", vector);
- else if (*isrc->is_straycount == MAX_STRAY_LOG)
- log(LOG_CRIT,
- "too many stray irq %d's: not logging anymore\n",
- vector);
- }
-}
-
-void
-intr_resume(void)
-{
- struct pic *pic;
-
-#ifndef DEV_ATPIC
- atpic_reset();
-#endif
- mtx_lock(&intr_table_lock);
- STAILQ_FOREACH(pic, &pics, pics) {
- if (pic->pic_resume != NULL)
- pic->pic_resume(pic);
- }
- mtx_unlock(&intr_table_lock);
-}
-
-void
-intr_suspend(void)
-{
- struct pic *pic;
-
- mtx_lock(&intr_table_lock);
- STAILQ_FOREACH(pic, &pics, pics) {
- if (pic->pic_suspend != NULL)
- pic->pic_suspend(pic);
- }
- mtx_unlock(&intr_table_lock);
-}
-
-static int
-intr_assign_cpu(void *arg, u_char cpu)
-{
-#ifdef SMP
- struct intsrc *isrc;
- int error;
-
- /*
- * Don't do anything during early boot. We will pick up the
- * assignment once the APs are started.
- */
- if (assign_cpu && cpu != NOCPU) {
- isrc = arg;
- mtx_lock(&intr_table_lock);
- error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
- mtx_unlock(&intr_table_lock);
- } else
- error = 0;
- return (error);
-#else
- return (EOPNOTSUPP);
-#endif
-}
-
-static void
-intrcnt_setname(const char *name, int index)
-{
-
- snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
- MAXCOMLEN, name);
-}
-
-static void
-intrcnt_updatename(struct intsrc *is)
-{
-
- intrcnt_setname(is->is_event->ie_fullname, is->is_index);
-}
-
-static void
-intrcnt_register(struct intsrc *is)
-{
- char straystr[MAXCOMLEN + 1];
-
- KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
- mtx_lock_spin(&intrcnt_lock);
- is->is_index = intrcnt_index;
- intrcnt_index += 2;
- snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
- is->is_pic->pic_vector(is));
- intrcnt_updatename(is);
- is->is_count = &intrcnt[is->is_index];
- intrcnt_setname(straystr, is->is_index + 1);
- is->is_straycount = &intrcnt[is->is_index + 1];
- mtx_unlock_spin(&intrcnt_lock);
-}
-
-void
-intrcnt_add(const char *name, u_long **countp)
-{
-
- mtx_lock_spin(&intrcnt_lock);
- *countp = &intrcnt[intrcnt_index];
- intrcnt_setname(name, intrcnt_index);
- intrcnt_index++;
- mtx_unlock_spin(&intrcnt_lock);
-}
-
-static void
-intr_init(void *dummy __unused)
-{
-
- intrcnt_setname("???", 0);
- intrcnt_index = 1;
- STAILQ_INIT(&pics);
- mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF);
- mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
-}
-SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
-
-#ifndef DEV_ATPIC
-/* Initialize the two 8259A's to a known-good shutdown state. */
-void
-atpic_reset(void)
-{
-
- outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
- outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
- outb(IO_ICU1 + ICU_IMR_OFFSET, 1 << 2);
- outb(IO_ICU1 + ICU_IMR_OFFSET, ICW4_8086);
- outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
- outb(IO_ICU1, OCW3_SEL | OCW3_RR);
-
- outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
- outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
- outb(IO_ICU2 + ICU_IMR_OFFSET, 2);
- outb(IO_ICU2 + ICU_IMR_OFFSET, ICW4_8086);
- outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
- outb(IO_ICU2, OCW3_SEL | OCW3_RR);
-}
-#endif
-
-/* Add a description to an active interrupt handler. */
-int
-intr_describe(u_int vector, void *ih, const char *descr)
-{
- struct intsrc *isrc;
- int error;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
- error = intr_event_describe_handler(isrc->is_event, ih, descr);
- if (error)
- return (error);
- intrcnt_updatename(isrc);
- return (0);
-}
-
-#ifdef DDB
-/*
- * Dump data about interrupt handlers
- */
-DB_SHOW_COMMAND(irqs, db_show_irqs)
-{
- struct intsrc **isrc;
- int i, verbose;
-
- if (strcmp(modif, "v") == 0)
- verbose = 1;
- else
- verbose = 0;
- isrc = interrupt_sources;
- for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++)
- if (*isrc != NULL)
- db_dump_intr_event((*isrc)->is_event, verbose);
-}
-#endif
-
-#ifdef SMP
-/*
- * Support for balancing interrupt sources across CPUs. For now we just
- * allocate CPUs round-robin.
- */
-
-static cpuset_t intr_cpus;
-static int current_cpu;
-
-/*
- * Return the CPU that the next interrupt source should use. For now
- * this just returns the next local APIC according to round-robin.
- */
-u_int
-intr_next_cpu(void)
-{
- u_int apic_id;
-
- /* Leave all interrupts on the BSP during boot. */
- if (!assign_cpu)
- return (PCPU_GET(apic_id));
-
- /* All interrupts go to the BSP if not allowed to round robin */
- if (!round_robin_interrupts)
- return (cpu_apic_ids[0]);
-
- mtx_lock_spin(&icu_lock);
- apic_id = cpu_apic_ids[current_cpu];
- do {
- current_cpu++;
- if (current_cpu > mp_maxid)
- current_cpu = 0;
- } while (!CPU_ISSET(current_cpu, &intr_cpus));
- mtx_unlock_spin(&icu_lock);
- return (apic_id);
-}
-
-/* Attempt to bind the specified IRQ to the specified CPU. */
-int
-intr_bind(u_int vector, u_char cpu)
-{
- struct intsrc *isrc;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
- return (intr_event_bind(isrc->is_event, cpu));
-}
-
-/*
- * Add a CPU to our mask of valid CPUs that can be destinations of
- * interrupts.
- */
-void
-intr_add_cpu(u_int cpu)
-{
-
- if (cpu >= MAXCPU)
- panic("%s: Invalid CPU ID", __func__);
- if (bootverbose)
- printf("INTR: Adding local APIC %d as a target\n",
- cpu_apic_ids[cpu]);
-
- CPU_SET(cpu, &intr_cpus);
-}
-
-/*
- * Distribute all the interrupt sources among the available CPUs once the
- * AP's have been launched.
- */
-static void
-intr_shuffle_irqs(void *arg __unused)
-{
- struct intsrc *isrc;
- int i;
-
- /* The BSP is always a valid target. */
- CPU_SETOF(0, &intr_cpus);
-
- /* Don't bother on UP. */
- if (mp_ncpus == 1)
- return;
-
- /* Round-robin assign a CPU to each enabled source. */
- mtx_lock(&intr_table_lock);
- assign_cpu = 1;
- for (i = 0; i < NUM_IO_INTS; i++) {
- isrc = interrupt_sources[i];
- if (isrc != NULL && isrc->is_handlers > 0) {
- /*
- * If this event is already bound to a CPU,
- * then assign the source to that CPU instead
- * of picking one via round-robin. Note that
- * this is careful to only advance the
- * round-robin if the CPU assignment succeeds.
- */
- if (isrc->is_event->ie_cpu != NOCPU)
- (void)isrc->is_pic->pic_assign_cpu(isrc,
- cpu_apic_ids[isrc->is_event->ie_cpu]);
- else if (isrc->is_pic->pic_assign_cpu(isrc,
- cpu_apic_ids[current_cpu]) == 0)
- (void)intr_next_cpu();
-
- }
- }
- mtx_unlock(&intr_table_lock);
-}
-SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
- NULL);
-#else
-/*
- * Always route interrupts to the current processor in the UP case.
- */
-u_int
-intr_next_cpu(void)
-{
-
- return (PCPU_GET(apic_id));
-}
-#endif
diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c
deleted file mode 100644
index 40da8b6..0000000
--- a/sys/amd64/amd64/legacy.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/*-
- * Copyright 1998 Massachusetts Institute of Technology
- *
- * Permission to use, copy, modify, and distribute this software and
- * its documentation for any purpose and without fee is hereby
- * granted, provided that both the above copyright notice and this
- * permission notice appear in all copies, that both the above
- * copyright notice and this permission notice appear in all
- * supporting documentation, and that the name of M.I.T. not be used
- * in advertising or publicity pertaining to distribution of the
- * software without specific, written prior permission. M.I.T. makes
- * no representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied
- * warranty.
- *
- * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
- * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
- * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * This code implements a system driver for legacy systems that do not
- * support ACPI or when ACPI support is not present in the kernel.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cpu.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <machine/bus.h>
-#include <sys/pcpu.h>
-#include <sys/rman.h>
-#include <sys/smp.h>
-
-#include <machine/clock.h>
-#include <machine/legacyvar.h>
-#include <machine/resource.h>
-
-static MALLOC_DEFINE(M_LEGACYDEV, "legacydrv", "legacy system device");
-struct legacy_device {
- int lg_pcibus;
-};
-
-#define DEVTOAT(dev) ((struct legacy_device *)device_get_ivars(dev))
-
-static int legacy_probe(device_t);
-static int legacy_attach(device_t);
-static int legacy_print_child(device_t, device_t);
-static device_t legacy_add_child(device_t bus, u_int order, const char *name,
- int unit);
-static int legacy_read_ivar(device_t, device_t, int, uintptr_t *);
-static int legacy_write_ivar(device_t, device_t, int, uintptr_t);
-
-static device_method_t legacy_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, legacy_probe),
- DEVMETHOD(device_attach, legacy_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_print_child, legacy_print_child),
- DEVMETHOD(bus_add_child, legacy_add_child),
- DEVMETHOD(bus_read_ivar, legacy_read_ivar),
- DEVMETHOD(bus_write_ivar, legacy_write_ivar),
- DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource),
- DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
-
- { 0, 0 }
-};
-
-static driver_t legacy_driver = {
- "legacy",
- legacy_methods,
- 1, /* no softc */
-};
-static devclass_t legacy_devclass;
-
-DRIVER_MODULE(legacy, nexus, legacy_driver, legacy_devclass, 0, 0);
-
-static int
-legacy_probe(device_t dev)
-{
-
- device_set_desc(dev, "legacy system");
- device_quiet(dev);
- return (0);
-}
-
-static int
-legacy_attach(device_t dev)
-{
- device_t child;
-
- /*
- * Let our child drivers identify any child devices that they
- * can find. Once that is done attach any devices that we
- * found.
- */
- bus_generic_probe(dev);
- bus_generic_attach(dev);
-
- /*
- * If we didn't see ISA on a pci bridge, create some
- * connection points now so it shows up "on motherboard".
- */
- if (!devclass_get_device(devclass_find("isa"), 0)) {
- child = BUS_ADD_CHILD(dev, 0, "isa", 0);
- if (child == NULL)
- panic("legacy_attach isa");
- device_probe_and_attach(child);
- }
-
- return 0;
-}
-
-static int
-legacy_print_child(device_t bus, device_t child)
-{
- struct legacy_device *atdev = DEVTOAT(child);
- int retval = 0;
-
- retval += bus_print_child_header(bus, child);
- if (atdev->lg_pcibus != -1)
- retval += printf(" pcibus %d", atdev->lg_pcibus);
- retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */
-
- return (retval);
-}
-
-static device_t
-legacy_add_child(device_t bus, u_int order, const char *name, int unit)
-{
- device_t child;
- struct legacy_device *atdev;
-
- atdev = malloc(sizeof(struct legacy_device), M_LEGACYDEV,
- M_NOWAIT | M_ZERO);
- if (atdev == NULL)
- return(NULL);
- atdev->lg_pcibus = -1;
-
- child = device_add_child_ordered(bus, order, name, unit);
- if (child == NULL)
- free(atdev, M_LEGACYDEV);
- else
- /* should we free this in legacy_child_detached? */
- device_set_ivars(child, atdev);
-
- return (child);
-}
-
-static int
-legacy_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
-{
- struct legacy_device *atdev = DEVTOAT(child);
-
- switch (which) {
- case LEGACY_IVAR_PCIDOMAIN:
- *result = 0;
- break;
- case LEGACY_IVAR_PCIBUS:
- *result = atdev->lg_pcibus;
- break;
- default:
- return ENOENT;
- }
- return 0;
-}
-
-
-static int
-legacy_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
-{
- struct legacy_device *atdev = DEVTOAT(child);
-
- switch (which) {
- case LEGACY_IVAR_PCIDOMAIN:
- return EINVAL;
- case LEGACY_IVAR_PCIBUS:
- atdev->lg_pcibus = value;
- break;
- default:
- return ENOENT;
- }
- return 0;
-}
-
-/*
- * Legacy CPU attachment when ACPI is not available. Drivers like
- * cpufreq(4) hang off this.
- */
-static void cpu_identify(driver_t *driver, device_t parent);
-static int cpu_read_ivar(device_t dev, device_t child, int index,
- uintptr_t *result);
-static device_t cpu_add_child(device_t bus, u_int order, const char *name,
- int unit);
-static struct resource_list *cpu_get_rlist(device_t dev, device_t child);
-
-struct cpu_device {
- struct resource_list cd_rl;
- struct pcpu *cd_pcpu;
-};
-
-static device_method_t cpu_methods[] = {
- /* Device interface */
- DEVMETHOD(device_identify, cpu_identify),
- DEVMETHOD(device_probe, bus_generic_probe),
- DEVMETHOD(device_attach, bus_generic_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_add_child, cpu_add_child),
- DEVMETHOD(bus_read_ivar, cpu_read_ivar),
- DEVMETHOD(bus_get_resource_list, cpu_get_rlist),
- DEVMETHOD(bus_get_resource, bus_generic_rl_get_resource),
- DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource),
- DEVMETHOD(bus_alloc_resource, bus_generic_rl_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_rl_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
-
- DEVMETHOD_END
-};
-
-static driver_t cpu_driver = {
- "cpu",
- cpu_methods,
- 1, /* no softc */
-};
-static devclass_t cpu_devclass;
-DRIVER_MODULE(cpu, legacy, cpu_driver, cpu_devclass, 0, 0);
-
-static void
-cpu_identify(driver_t *driver, device_t parent)
-{
- device_t child;
- int i;
-
- /*
- * Attach a cpuX device for each CPU. We use an order of 150
- * so that these devices are attached after the Host-PCI
- * bridges (which are added at order 100).
- */
- CPU_FOREACH(i) {
- child = BUS_ADD_CHILD(parent, 150, "cpu", i);
- if (child == NULL)
- panic("legacy_attach cpu");
- }
-}
-
-static device_t
-cpu_add_child(device_t bus, u_int order, const char *name, int unit)
-{
- struct cpu_device *cd;
- device_t child;
- struct pcpu *pc;
-
- if ((cd = malloc(sizeof(*cd), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
- return (NULL);
-
- resource_list_init(&cd->cd_rl);
- pc = pcpu_find(device_get_unit(bus));
- cd->cd_pcpu = pc;
-
- child = device_add_child_ordered(bus, order, name, unit);
- if (child != NULL) {
- pc->pc_device = child;
- device_set_ivars(child, cd);
- } else
- free(cd, M_DEVBUF);
- return (child);
-}
-
-static struct resource_list *
-cpu_get_rlist(device_t dev, device_t child)
-{
- struct cpu_device *cpdev;
-
- cpdev = device_get_ivars(child);
- return (&cpdev->cd_rl);
-}
-
-static int
-cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
-{
- struct cpu_device *cpdev;
-
- switch (index) {
- case CPU_IVAR_PCPU:
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
- break;
- case CPU_IVAR_NOMINAL_MHZ:
- if (tsc_is_invariant) {
- *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
- 1000000);
- break;
- }
- /* FALLTHROUGH */
- default:
- return (ENOENT);
- }
- return (0);
-}
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 8044fe5..abce826 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -298,11 +298,6 @@ cpu_startup(dummy)
vm_pager_bufferinit();
cpu_setregs();
-
- /*
- * Add BSP as an interrupt target.
- */
- intr_add_cpu(0);
}
/*
@@ -996,7 +991,7 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
pcb->pcb_dr3 = 0;
pcb->pcb_dr6 = 0;
pcb->pcb_dr7 = 0;
- if (pcb == PCPU_GET(curpcb)) {
+ if (pcb == curpcb) {
/*
* Clear the debug registers on the running
* CPU, otherwise they will end up affecting
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 86057e5..b4a0be4 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -804,6 +804,8 @@ init_secondary(void)
* We tell the I/O APIC code about all the CPUs we want to receive
* interrupts. If we don't want certain CPUs to receive IRQs we
* can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
*/
static void
set_interrupt_apic_ids(void)
@@ -814,6 +816,8 @@ set_interrupt_apic_ids(void)
apic_id = cpu_apic_ids[i];
if (apic_id == -1)
continue;
+ if (cpu_info[apic_id].cpu_bsp)
+ continue;
if (cpu_info[apic_id].cpu_disabled)
continue;
@@ -1079,26 +1083,15 @@ ipi_startup(int apic_id, int vector)
{
/*
- * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
+ * first we do an INIT IPI: this INIT IPI might be run, resetting
* and running the target CPU. OR this INIT IPI might be latched (P5
* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
* ignored.
*/
-
- /* do an INIT IPI: assert RESET */
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-
- /* wait for pending status end */
lapic_ipi_wait(-1);
-
- /* do an INIT IPI: deassert RESET */
- lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
-
- /* wait for pending status end */
DELAY(10000); /* wait ~10mS */
- lapic_ipi_wait(-1);
/*
* next we do a STARTUP IPI: the previous INIT IPI might still be
@@ -1108,8 +1101,6 @@ ipi_startup(int apic_id, int vector)
* run. OR the previous INIT IPI was ignored. and this STARTUP IPI
* will run.
*/
-
- /* do a STARTUP IPI */
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
@@ -1122,7 +1113,6 @@ ipi_startup(int apic_id, int vector)
* this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
* recognized after hardware RESET or INIT IPI.
*/
-
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 9e1ae3d..06b45b2 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -82,13 +82,6 @@ __FBSDID("$FreeBSD$");
/*
* Manages physical address maps.
*
- * In addition to hardware address maps, this
- * module is called upon to provide software-use-only
- * maps which may or may not be stored in the same
- * form as hardware maps. These pseudo-maps are
- * used to store intermediate results from copy
- * operations to and from address spaces.
- *
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
@@ -232,16 +225,7 @@ u_int64_t KPML4phys; /* phys addr of kernel level 4 */
static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
-/*
- * Isolate the global pv list lock from data and other locks to prevent false
- * sharing within the cache.
- */
-static struct {
- struct rwlock lock;
- char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
-} pvh_global __aligned(CACHE_LINE_SIZE);
-
-#define pvh_global_lock pvh_global.lock
+static struct rwlock_padalign pvh_global_lock;
/*
* Data for the pv entry allocation mechanism
@@ -323,8 +307,8 @@ static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
struct rwlock **lockp);
-static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_page_t* free);
+static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_page_t *free);
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
@@ -629,6 +613,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* XXX do %cr0 as well */
load_cr4(rcr4() | CR4_PGE | CR4_PSE);
load_cr3(KPML4phys);
+ if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
+ load_cr4(rcr4() | CR4_SMEP);
/*
* Initialize the kernel pmap (which is statically allocated).
@@ -1557,23 +1543,25 @@ pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
}
/*
- * This routine unholds page table pages, and if the hold count
- * drops to zero, then it decrements the wire count.
+ * Decrements a page table page's wire count, which is used to record the
+ * number of valid page table entries within the page. If the wire count
+ * drops to zero, then the page table page is unmapped. Returns TRUE if the
+ * page table page was unmapped and FALSE otherwise.
*/
-static __inline int
-pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
+static inline boolean_t
+pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
{
--m->wire_count;
- if (m->wire_count == 0)
- return (_pmap_unwire_pte_hold(pmap, va, m, free));
- else
- return (0);
+ if (m->wire_count == 0) {
+ _pmap_unwire_ptp(pmap, va, m, free);
+ return (TRUE);
+ } else
+ return (FALSE);
}
-static int
-_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_page_t *free)
+static void
+_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -1602,14 +1590,14 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_page_t pdpg;
pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
- pmap_unwire_pte_hold(pmap, va, pdpg, free);
+ pmap_unwire_ptp(pmap, va, pdpg, free);
}
if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
/* We just released a PD, unhold the matching PDP */
vm_page_t pdppg;
pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
- pmap_unwire_pte_hold(pmap, va, pdppg, free);
+ pmap_unwire_ptp(pmap, va, pdppg, free);
}
/*
@@ -1624,8 +1612,6 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
* *ALL* TLB shootdown is done
*/
pmap_add_delayed_free_list(m, free, TRUE);
-
- return (1);
}
/*
@@ -1641,7 +1627,7 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free)
return (0);
KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return (pmap_unwire_pte_hold(pmap, va, mpte, free));
+ return (pmap_unwire_ptp(pmap, va, mpte, free));
}
void
@@ -3439,7 +3425,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
pv_entry_t pv;
vm_paddr_t opa, pa;
vm_page_t mpte, om;
- boolean_t invlva;
va = trunc_page(va);
KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
@@ -3453,11 +3438,13 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
VM_OBJECT_LOCKED(m->object),
("pmap_enter: page %p is not busy", m));
pa = VM_PAGE_TO_PHYS(m);
- newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
- if ((m->oflags & VPO_UNMANAGED) == 0)
- newpte |= PG_MANAGED;
+ newpte = (pt_entry_t)(pa | PG_A | PG_V);
+ if ((access & VM_PROT_WRITE) != 0)
+ newpte |= PG_M;
if ((prot & VM_PROT_WRITE) != 0)
newpte |= PG_RW;
+ KASSERT((newpte & (PG_M | PG_RW)) != PG_M,
+ ("pmap_enter: access includes VM_PROT_WRITE but prot doesn't"));
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
if (wired)
@@ -3466,8 +3453,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
newpte |= PG_U;
if (pmap == kernel_pmap)
newpte |= PG_G;
+ newpte |= pmap_cache_bits(m->md.pat_mode, 0);
- mpte = om = NULL;
+ mpte = NULL;
lock = NULL;
rw_rlock(&pvh_global_lock);
@@ -3477,109 +3465,106 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
* In the case that a page table page is not
* resident, we are creating it here.
*/
- if (va < VM_MAXUSER_ADDRESS)
- mpte = pmap_allocpte(pmap, va, &lock);
-
+retry:
pde = pmap_pde(pmap, va);
- if (pde != NULL && (*pde & PG_V) != 0) {
- if ((*pde & PG_PS) != 0)
- panic("pmap_enter: attempted pmap_enter on 2MB page");
+ if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 ||
+ pmap_demote_pde_locked(pmap, pde, va, &lock))) {
pte = pmap_pde_to_pte(pde, va);
+ if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
+ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+ mpte->wire_count++;
+ }
+ } else if (va < VM_MAXUSER_ADDRESS) {
+ /*
+ * Here if the pte page isn't mapped, or if it has been
+ * deallocated.
+ */
+ mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), &lock);
+ goto retry;
} else
panic("pmap_enter: invalid page directory va=%#lx", va);
origpte = *pte;
- opa = origpte & PG_FRAME;
/*
- * Mapping has not changed, must be protection or wiring change.
+ * Is the specified virtual address already mapped?
*/
- if (origpte && (opa == pa)) {
+ if ((origpte & PG_V) != 0) {
/*
* Wiring change, just update stats. We don't worry about
* wiring PT pages as they remain resident as long as there
* are valid mappings in them. Hence, if a user page is wired,
* the PT page will be also.
*/
- if (wired && ((origpte & PG_W) == 0))
+ if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0)
pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
+ else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0)
pmap->pm_stats.wired_count--;
/*
- * Remove extra pte reference
+ * Remove the extra PT page reference.
*/
- if (mpte)
- mpte->wire_count--;
-
- if ((origpte & PG_MANAGED) != 0)
- om = m;
- goto validate;
- }
-
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if ((origpte & PG_MANAGED) != 0)
- om = PHYS_TO_VM_PAGE(opa);
if (mpte != NULL) {
mpte->wire_count--;
KASSERT(mpte->wire_count > 0,
("pmap_enter: missing reference to page table page,"
" va: 0x%lx", va));
}
- } else
- pmap_resident_count_inc(pmap, 1);
- /*
- * Increment the counters.
- */
- if (wired)
- pmap->pm_stats.wired_count++;
+ /*
+ * Has the physical page changed?
+ */
+ opa = origpte & PG_FRAME;
+ if (opa == pa) {
+ /*
+ * No, might be a protection or wiring change.
+ */
+ if ((origpte & PG_MANAGED) != 0) {
+ newpte |= PG_MANAGED;
+ if ((newpte & PG_RW) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ }
+ if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0)
+ goto unchanged;
+ goto validate;
+ }
+ } else {
+ /*
+ * Increment the counters.
+ */
+ if ((newpte & PG_W) != 0)
+ pmap->pm_stats.wired_count++;
+ pmap_resident_count_inc(pmap, 1);
+ }
/*
* Enter on the PV list if part of our managed memory.
*/
- if ((newpte & PG_MANAGED) != 0) {
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ newpte |= PG_MANAGED;
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ if ((newpte & PG_RW) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
}
-validate:
-
/*
- * Update the PTE only if the mapping or protection/wiring bits are
- * different.
+ * Update the PTE.
*/
- if ((origpte & ~(PG_M | PG_A)) != newpte) {
- newpte |= PG_A;
- if ((access & VM_PROT_WRITE) != 0)
- newpte |= PG_M;
- if ((newpte & (PG_MANAGED | PG_RW)) == (PG_MANAGED | PG_RW))
- vm_page_aflag_set(m, PGA_WRITEABLE);
- if (origpte & PG_V) {
- invlva = FALSE;
- origpte = pte_load_store(pte, newpte);
- if (origpte & PG_A) {
- if (origpte & PG_MANAGED)
- vm_page_aflag_set(om, PGA_REFERENCED);
- if (opa != pa || ((origpte & PG_NX) == 0 &&
- (newpte & PG_NX) != 0))
- invlva = TRUE;
- }
- if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((origpte & PG_MANAGED) != 0)
+ if ((origpte & PG_V) != 0) {
+validate:
+ origpte = pte_load_store(pte, newpte);
+ opa = origpte & PG_FRAME;
+ if (opa != pa) {
+ if ((origpte & PG_MANAGED) != 0) {
+ om = PHYS_TO_VM_PAGE(opa);
+ if ((origpte & (PG_M | PG_RW)) == (PG_M |
+ PG_RW))
vm_page_dirty(om);
- if ((newpte & PG_RW) == 0)
- invlva = TRUE;
- }
- if (opa != pa && (origpte & PG_MANAGED) != 0) {
+ if ((origpte & PG_A) != 0)
+ vm_page_aflag_set(om, PGA_REFERENCED);
CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
pmap_pvh_free(&om->md, pmap, va);
if ((om->aflags & PGA_WRITEABLE) != 0 &&
@@ -3588,11 +3573,28 @@ validate:
TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
vm_page_aflag_clear(om, PGA_WRITEABLE);
}
- if (invlva)
- pmap_invalidate_page(pmap, va);
- } else
- pte_store(pte, newpte);
- }
+ } else if ((newpte & PG_M) == 0 && (origpte & (PG_M |
+ PG_RW)) == (PG_M | PG_RW)) {
+ if ((origpte & PG_MANAGED) != 0)
+ vm_page_dirty(m);
+
+ /*
+ * Although the PTE may still have PG_RW set, TLB
+ * invalidation may nonetheless be required because
+ * the PTE no longer has PG_M set.
+ */
+ } else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) {
+ /*
+ * This PTE change does not require TLB invalidation.
+ */
+ goto unchanged;
+ }
+ if ((origpte & PG_A) != 0)
+ pmap_invalidate_page(pmap, va);
+ } else
+ pte_store(pte, newpte);
+
+unchanged:
/*
* If both the page table page and the reservation are fully
@@ -3650,7 +3652,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m),
lockp)) {
free = NULL;
- if (pmap_unwire_pte_hold(pmap, va, mpde, &free)) {
+ if (pmap_unwire_ptp(pmap, va, mpde, &free)) {
pmap_invalidate_page(pmap, va);
pmap_free_zero_pages(free);
}
@@ -3826,7 +3828,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
!pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
if (mpte != NULL) {
free = NULL;
- if (pmap_unwire_pte_hold(pmap, va, mpte, &free)) {
+ if (pmap_unwire_ptp(pmap, va, mpte, &free)) {
pmap_invalidate_page(pmap, va);
pmap_free_zero_pages(free);
}
@@ -4133,8 +4135,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
pmap_resident_count_inc(dst_pmap, 1);
} else {
free = NULL;
- if (pmap_unwire_pte_hold(dst_pmap,
- addr, dstmpte, &free)) {
+ if (pmap_unwire_ptp(dst_pmap, addr,
+ dstmpte, &free)) {
pmap_invalidate_page(dst_pmap,
addr);
pmap_free_zero_pages(free);
@@ -4982,7 +4984,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
void
pmap_unmapdev(vm_offset_t va, vm_size_t size)
{
- vm_offset_t base, offset, tmpva;
+ vm_offset_t base, offset;
/* If we gave a direct map region in pmap_mapdev, do nothing */
if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
@@ -4990,9 +4992,6 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size)
base = trunc_page(va);
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
- pmap_kremove(tmpva);
- pmap_invalidate_range(kernel_pmap, va, tmpva);
kmem_free(kernel_map, base, size);
}
diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c
index 8236321..9fa1917 100644
--- a/sys/amd64/amd64/ptrace_machdep.c
+++ b/sys/amd64/amd64/ptrace_machdep.c
@@ -50,6 +50,7 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
switch (req) {
case PT_GETXSTATE:
+ fpugetregs(td);
savefpu = (char *)(get_pcb_user_save_td(td) + 1);
error = copyout(savefpu, addr,
cpu_max_ext_state_size - sizeof(struct savefpu));
@@ -62,8 +63,10 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
}
savefpu = malloc(data, M_TEMP, M_WAITOK);
error = copyin(addr, savefpu, data);
- if (error == 0)
+ if (error == 0) {
+ fpugetregs(td);
error = fpusetxstate(td, savefpu, data);
+ }
free(savefpu, M_TEMP);
break;
@@ -89,11 +92,13 @@ cpu32_ptrace(struct thread *td, int req, void *addr, int data)
switch (req) {
case PT_I386_GETXMMREGS:
+ fpugetregs(td);
error = copyout(get_pcb_user_save_td(td), addr,
sizeof(*fpstate));
break;
case PT_I386_SETXMMREGS:
+ fpugetregs(td);
fpstate = get_pcb_user_save_td(td);
error = copyin(addr, fpstate, sizeof(*fpstate));
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 75e15e0..6fcca81 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -176,9 +176,14 @@ static int panic_on_nmi = 1;
SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
-static int prot_fault_translation = 0;
+static int prot_fault_translation;
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
- &prot_fault_translation, 0, "Select signal to deliver on protection fault");
+ &prot_fault_translation, 0,
+ "Select signal to deliver on protection fault");
+static int uprintf_signal;
+SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
+ &uprintf_signal, 0,
+ "Print debugging information on trap signal to ctty");
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
@@ -328,7 +333,7 @@ trap(struct trapframe *frame)
break;
case T_ARITHTRAP: /* arithmetic trap */
- ucode = fputrap();
+ ucode = fputrap_x87();
if (ucode == -1)
goto userout;
i = SIGFPE;
@@ -442,7 +447,9 @@ trap(struct trapframe *frame)
break;
case T_XMMFLT: /* SIMD floating-point exception */
- ucode = 0; /* XXX */
+ ucode = fputrap_sse();
+ if (ucode == -1)
+ goto userout;
i = SIGFPE;
break;
}
@@ -518,9 +525,8 @@ trap(struct trapframe *frame)
frame->tf_rip = (long)fsbase_load_fault;
goto out;
}
- if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame->tf_rip =
- (long)PCPU_GET(curpcb)->pcb_onfault;
+ if (curpcb->pcb_onfault != NULL) {
+ frame->tf_rip = (long)curpcb->pcb_onfault;
goto out;
}
break;
@@ -609,11 +615,25 @@ trap(struct trapframe *frame)
ksi.ksi_code = ucode;
ksi.ksi_trapno = type;
ksi.ksi_addr = (void *)addr;
+ if (uprintf_signal) {
+ uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
+ "addr 0x%lx rip 0x%lx "
+ "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
+ p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
+ frame->tf_rip,
+ fubyte((void *)(frame->tf_rip + 0)),
+ fubyte((void *)(frame->tf_rip + 1)),
+ fubyte((void *)(frame->tf_rip + 2)),
+ fubyte((void *)(frame->tf_rip + 3)),
+ fubyte((void *)(frame->tf_rip + 4)),
+ fubyte((void *)(frame->tf_rip + 5)),
+ fubyte((void *)(frame->tf_rip + 6)),
+ fubyte((void *)(frame->tf_rip + 7)));
+ }
trapsignal(td, &ksi);
user:
userret(td, frame);
- mtx_assert(&Giant, MA_NOTOWNED);
KASSERT(PCB_USER_FPU(td->td_pcb),
("Return from trap with kernel FPU ctx leaked"));
userout:
@@ -706,7 +726,7 @@ trap_pfault(frame, usermode)
* it normally, and panic immediately.
*/
if (!usermode && (td->td_intr_nesting_level != 0 ||
- PCPU_GET(curpcb)->pcb_onfault == NULL)) {
+ curpcb->pcb_onfault == NULL)) {
trap_fatal(frame, eva);
return (-1);
}
@@ -762,8 +782,8 @@ trap_pfault(frame, usermode)
nogo:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
- PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
+ curpcb->pcb_onfault != NULL) {
+ frame->tf_rip = (long)curpcb->pcb_onfault;
return (0);
}
trap_fatal(frame, eva);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 8a7484f..a40eaba 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -90,6 +90,10 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+CTASSERT((struct thread **)OFFSETOF_CURTHREAD ==
+ &((struct pcpu *)NULL)->pc_curthread);
+CTASSERT((struct pcb **)OFFSETOF_CURPCB == &((struct pcpu *)NULL)->pc_curpcb);
+
struct savefpu *
get_pcb_user_save_td(struct thread *td)
{
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 271a334..9c72500 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -148,6 +148,7 @@ device ciss # Compaq Smart RAID 5*
device dpt # DPT Smartcache III, IV - See NOTES for options
device hptmv # Highpoint RocketRAID 182x
device hptrr # Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx
+device hpt27xx # Highpoint RocketRAID 27xx
device iir # Intel Integrated RAID
device ips # IBM (Adaptec) ServeRAID
device mly # Mylex AcceleRAID/eXtremeRAID
@@ -270,7 +271,7 @@ device wlan_ccmp # 802.11 CCMP support
device wlan_tkip # 802.11 TKIP support
device wlan_amrr # AMRR transmit rate control algorithm
device an # Aironet 4500/4800 802.11 wireless NICs.
-device ath # Atheros NIC's
+device ath # Atheros NICs
device ath_pci # Atheros pci/cardbus glue
device ath_hal # pci/cardbus chip support
options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors
@@ -291,6 +292,8 @@ device wpi # Intel 3945ABG wireless NICs.
# Pseudo devices.
device loop # Network loopback
device random # Entropy device
+options PADLOCK_RNG # VIA Padlock RNG
+options RDRAND_RNG # Intel Bull Mountain RNG
device ether # Ethernet support
device vlan # 802.1Q VLAN support
device tun # Packet tunnel.
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index c74e5af..6562981 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -98,7 +98,7 @@ options OFED_DEBUG_INIT
options SDP
options SDP_DEBUG
-# IP over Inifiband
+# IP over Infiniband
options IPOIB
options IPOIB_DEBUG
options IPOIB_CM
@@ -306,8 +306,6 @@ options DRM_DEBUG # Include debug printfs (slow)
# mlx4ib: Mellanox ConnectX HCA InfiniBand
# mlxen: Mellanox ConnectX HCA Ethernet
# mthca: Mellanox HCA InfiniBand
-# mwl: Marvell 88W8363 IEEE 802.11 adapter
-# Requires the mwl firmware module
# nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source)
# nve: nVidia nForce MCP on-board Ethernet Networking
# sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters
@@ -324,10 +322,9 @@ device iwn # Intel 4965/1000/5000/6000 wireless NICs.
device mlx4ib # Mellanox ConnectX HCA InfiniBand
device mlxen # Mellanox ConnectX HCA Ethernet
device mthca # Mellanox HCA InfiniBand
-device mwl # Marvell 88W8363 802.11n wireless NICs.
device nfe # nVidia nForce MCP on-board Ethernet
device nve # nVidia nForce MCP on-board Ethernet Networking
-device sfxge
+device sfxge # Solarflare SFC9000 10Gb Ethernet
device wpi # Intel 3945ABG wireless NICs.
# IEEE 802.11 adapter firmware modules
@@ -350,7 +347,6 @@ device wpi # Intel 3945ABG wireless NICs.
# iwn5150fw: Specific module for the 5150 only
# iwn6000fw: Specific module for the 6000 only
# iwn6050fw: Specific module for the 6050 only
-# mwlfw: Marvell 88W8363 firmware
# wpifw: Intel 3945ABG Wireless LAN Controller firmware
device iwifw
@@ -368,7 +364,6 @@ device iwn5000fw
device iwn5150fw
device iwn6000fw
device iwn6050fw
-device mwlfw
device wpifw
#
@@ -433,6 +428,11 @@ device isci
options ISCI_LOGGING # enable debugging in isci HAL
#
+# NVM Express (NVMe) support
+device nvme # base NVMe driver
+device nvd # expose NVMe namespaces as disks, depends on nvme
+
+#
# SafeNet crypto driver: can be moved to the MI NOTES as soon as
# it's tested on a big-endian machine
#
diff --git a/sys/amd64/conf/XENHVM b/sys/amd64/conf/XENHVM
index e5fc049..ee745ec 100644
--- a/sys/amd64/conf/XENHVM
+++ b/sys/amd64/conf/XENHVM
@@ -6,8 +6,6 @@
include GENERIC
ident XENHVM
-makeoptions MODULES_OVERRIDE=""
-
#
# Adaptive locks rely on a lock-free pointer read to determine the run state
# of the thread holding a lock when under contention; under a virtualisation
diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S
index 710834c..3541988 100644
--- a/sys/amd64/ia32/ia32_sigtramp.S
+++ b/sys/amd64/ia32/ia32_sigtramp.S
@@ -91,8 +91,29 @@ ia32_osigcode:
*/
ALIGN_TEXT
lcall_tramp:
+ .code64
+ /*
+ * There, we are in 64bit mode and need to return to 32bit.
+ * First, convert call frame from 64 to 32 bit format.
+ */
+ pushq %rax
+ movl 16(%rsp),%eax
+ movl %eax,20(%rsp) /* ret %cs */
+ movl 8(%rsp),%eax
+ movl %eax,16(%rsp) /* ret %rip -> %eip */
+ popq %rax
+ addq $8,%rsp
+ /* Now return to 32bit */
+ pushq $0x33 /* _ucode32sel UPL */
+ callq 1f
+1:
+ addq $2f-1b,(%rsp)
+ lretq
+2:
+ /* Back in 32bit mode */
+ .code32
cmpl $SYS_vfork,%eax
- je 2f
+ je 4f
pushl %ebp
movl %esp,%ebp
pushl 0x24(%ebp) /* arg 6 */
@@ -101,19 +122,19 @@ lcall_tramp:
pushl 0x18(%ebp)
pushl 0x14(%ebp)
pushl 0x10(%ebp) /* arg 1 */
- pushl 0xc(%ebp) /* gap */
+ pushl 0xc(%ebp) /* gap */
int $0x80
leavel
-1:
+3:
lretl
-2:
+4:
/*
* vfork handling is special and relies on the libc stub saving
* the return ip in %ecx. If vfork failed, then there is no
* child which can corrupt the frame created by call gate.
*/
int $0x80
- jb 1b
+ jb 3b
addl $8,%esp
jmpl *%ecx
#endif
diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c
index d79272a..0cdec6f 100644
--- a/sys/amd64/ia32/ia32_syscall.c
+++ b/sys/amd64/ia32/ia32_syscall.c
@@ -244,7 +244,7 @@ setup_lcall_gate(void)
bzero(ssd, sizeof(*ssd));
ssd->gd_looffset = lcall_addr;
ssd->gd_hioffset = lcall_addr >> 16;
- ssd->gd_selector = _ucode32sel;
+ ssd->gd_selector = _ucodesel;
ssd->gd_type = SDT_SYSCGT;
ssd->gd_dpl = SEL_UPL;
ssd->gd_p = 1;
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
index 99a94b7..91c33e6 100644
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -226,7 +226,7 @@ atomic_fetchadd_long(volatile u_long *p, u_long v)
static __inline void \
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
- __asm __volatile("" : : : "memory"); \
+ __compiler_membar(); \
*p = v; \
} \
struct __hack
@@ -240,7 +240,7 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
u_##TYPE tmp; \
\
tmp = *p; \
- __asm __volatile("" : : : "memory"); \
+ __compiler_membar(); \
return (tmp); \
} \
struct __hack
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 09d04db..7243173 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -290,6 +290,13 @@ popcntq(u_long mask)
}
static __inline void
+lfence(void)
+{
+
+ __asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
mfence(void)
{
diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h
index 4c39fa6..1ce59d0 100644
--- a/sys/amd64/include/fpu.h
+++ b/sys/amd64/include/fpu.h
@@ -63,7 +63,8 @@ int fpusetregs(struct thread *td, struct savefpu *addr,
char *xfpustate, size_t xfpustate_size);
int fpusetxstate(struct thread *td, char *xfpustate,
size_t xfpustate_size);
-int fputrap(void);
+int fputrap_sse(void);
+int fputrap_x87(void);
void fpuuserinited(struct thread *td);
struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 9d066b1..700e35f 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -140,7 +140,9 @@ int elcr_probe(void);
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
void intr_add_cpu(u_int cpu);
+#endif
int intr_add_handler(const char *name, int vector, driver_filter_t filter,
driver_intr_t handler, void *arg, enum intr_type flags,
void **cookiep);
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index ff11ea1..5d7cb74 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -48,6 +48,7 @@ extern u_int amd_pminfo;
extern u_int via_feature_rng;
extern u_int via_feature_xcrypt;
extern u_int cpu_clflush_line_size;
+extern u_int cpu_stdext_feature;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
@@ -57,6 +58,7 @@ extern u_int cpu_procinfo;
extern u_int cpu_procinfo2;
extern char cpu_vendor[];
extern u_int cpu_vendor_id;
+extern char ctx_switch_xsave[];
extern char kstack[];
extern char sigcode[];
extern int szsigcode;
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index 6dbeb40..9ddcf68 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -123,14 +123,6 @@
#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */
/*
- * Ceiling on amount of swblock kva space, can be changed via
- * the kern.maxswzone /boot/loader.conf variable.
- */
-#ifndef VM_SWZONE_SIZE_MAX
-#define VM_SWZONE_SIZE_MAX (32 * 1024 * 1024)
-#endif
-
-/*
* Mach derived conversion macros
*/
#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
diff --git a/sys/amd64/include/pc/bios.h b/sys/amd64/include/pc/bios.h
index 364f86c..e7d568e 100644
--- a/sys/amd64/include/pc/bios.h
+++ b/sys/amd64/include/pc/bios.h
@@ -30,16 +30,9 @@
#ifndef _MACHINE_PC_BIOS_H_
#define _MACHINE_PC_BIOS_H_
-extern u_int32_t bios_sigsearch(u_int32_t start, u_char *sig, int siglen,
- int paralen, int sigofs);
-
-#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE)
-#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE)
-
/*
* Int 15:E820 'SMAP' structure
*/
-
#define SMAP_SIG 0x534D4150 /* 'SMAP' */
#define SMAP_TYPE_MEMORY 1
@@ -58,22 +51,61 @@ struct bios_smap {
u_int32_t type;
} __packed;
+/*
+ * System Management BIOS
+ */
+#define SMBIOS_START 0xf0000
+#define SMBIOS_STEP 0x10
+#define SMBIOS_OFF 0
+#define SMBIOS_LEN 4
+#define SMBIOS_SIG "_SM_"
+
+struct smbios_eps {
+ uint8_t anchor_string[4]; /* '_SM_' */
+ uint8_t checksum;
+ uint8_t length;
+ uint8_t major_version;
+ uint8_t minor_version;
+ uint16_t maximum_structure_size;
+ uint8_t entry_point_revision;
+ uint8_t formatted_area[5];
+ uint8_t intermediate_anchor_string[5]; /* '_DMI_' */
+ uint8_t intermediate_checksum;
+ uint16_t structure_table_length;
+ uint32_t structure_table_address;
+ uint16_t number_structures;
+ uint8_t BCD_revision;
+};
+
+struct smbios_structure_header {
+ uint8_t type;
+ uint8_t length;
+ uint16_t handle;
+};
+
+#ifdef _KERNEL
+#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE)
+#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE)
+
struct bios_oem_signature {
char * anchor; /* search anchor string in BIOS memory */
size_t offset; /* offset from anchor (may be negative) */
size_t totlen; /* total length of BIOS string to copy */
} __packed;
+
struct bios_oem_range {
u_int from; /* shouldn't be below 0xe0000 */
u_int to; /* shouldn't be above 0xfffff */
} __packed;
+
struct bios_oem {
struct bios_oem_range range;
struct bios_oem_signature signature[];
} __packed;
-extern int
-bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen);
-
+int bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen);
+uint32_t bios_sigsearch(uint32_t start, u_char *sig, int siglen, int paralen,
+ int sigofs);
+#endif
#endif /* _MACHINE_PC_BIOS_H_ */
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index d07dbac..2188442 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -216,16 +216,36 @@ extern struct pcpu *pcpup;
#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
+#define OFFSETOF_CURTHREAD 0
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnull-dereference"
+#endif
static __inline __pure2 struct thread *
__curthread(void)
{
struct thread *td;
- __asm("movq %%gs:0,%0" : "=r" (td));
+ __asm("movq %%gs:%1,%0" : "=r" (td)
+ : "m" (*(char *)OFFSETOF_CURTHREAD));
return (td);
}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
#define curthread (__curthread())
+#define OFFSETOF_CURPCB 32
+static __inline __pure2 struct pcb *
+__curpcb(void)
+{
+ struct pcb *pcb;
+
+ __asm("movq %%gs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
+ return (pcb);
+}
+#define curpcb (__curpcb())
+
#define IS_BSP() (PCPU_GET(cpuid) == 0)
#else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
diff --git a/sys/amd64/pci/pci_cfgreg.c b/sys/amd64/pci/pci_cfgreg.c
index 3e29a58..2714ecb 100644
--- a/sys/amd64/pci/pci_cfgreg.c
+++ b/sys/amd64/pci/pci_cfgreg.c
@@ -295,6 +295,13 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
return (1);
}
+/*
+ * AMD BIOS And Kernel Developer's Guides for CPU families starting with 10h
+ * have a requirement that all accesses to the memory mapped PCI configuration
+ * space are done using AX class of registers.
+ * Since other vendors do not currently have any contradicting requirements
+ * the AMD access pattern is applied universally.
+ */
#define PCIE_VADDR(base, reg, bus, slot, func) \
((base) + \
((((bus) & 0xff) << 20) | \
@@ -317,13 +324,16 @@ pciereg_cfgread(int bus, unsigned slot, unsigned func, unsigned reg,
switch (bytes) {
case 4:
- data = *(volatile uint32_t *)(va);
+ __asm __volatile("mov %1, %%eax" : "=a" (data)
+ : "m" (*(uint32_t *)va));
break;
case 2:
- data = *(volatile uint16_t *)(va);
+ __asm __volatile("movzwl %1, %%eax" : "=a" (data)
+ : "m" (*(uint16_t *)va));
break;
case 1:
- data = *(volatile uint8_t *)(va);
+ __asm __volatile("movzbl %1, %%eax" : "=a" (data)
+ : "m" (*(uint8_t *)va));
break;
}
@@ -344,13 +354,16 @@ pciereg_cfgwrite(int bus, unsigned slot, unsigned func, unsigned reg, int data,
switch (bytes) {
case 4:
- *(volatile uint32_t *)(va) = data;
+ __asm __volatile("mov %%eax, %0" : "=m" (*(uint32_t *)va)
+ : "a" (data));
break;
case 2:
- *(volatile uint16_t *)(va) = data;
+ __asm __volatile("mov %%ax, %0" : "=m" (*(uint16_t *)va)
+ : "a" (data));
break;
case 1:
- *(volatile uint8_t *)(va) = data;
+ __asm __volatile("mov %%al, %0" : "=m" (*(uint8_t *)va)
+ : "a" (data));
break;
}
}
OpenPOWER on IntegriCloud