summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/amd64')
-rw-r--r--sys/amd64/amd64/apic_vector.S276
-rw-r--r--sys/amd64/amd64/autoconf.c25
-rw-r--r--sys/amd64/amd64/cpu_switch.S27
-rw-r--r--sys/amd64/amd64/db_interface.c37
-rw-r--r--sys/amd64/amd64/db_trace.c5
-rw-r--r--sys/amd64/amd64/exception.S17
-rw-r--r--sys/amd64/amd64/fpu.c3
-rw-r--r--sys/amd64/amd64/genassym.c20
-rw-r--r--sys/amd64/amd64/identcpu.c3
-rw-r--r--sys/amd64/amd64/io_apic.c5
-rw-r--r--sys/amd64/amd64/local_apic.c26
-rw-r--r--sys/amd64/amd64/machdep.c141
-rw-r--r--sys/amd64/amd64/mem.c9
-rw-r--r--sys/amd64/amd64/mp_machdep.c473
-rw-r--r--sys/amd64/amd64/mpboot.S398
-rw-r--r--sys/amd64/amd64/mptable.c11
-rw-r--r--sys/amd64/amd64/nexus.c12
-rw-r--r--sys/amd64/amd64/pmap.c233
-rw-r--r--sys/amd64/amd64/support.S14
-rw-r--r--sys/amd64/amd64/trap.c17
-rw-r--r--sys/amd64/amd64/tsc.c16
-rw-r--r--sys/amd64/amd64/vm_machdep.c65
22 files changed, 903 insertions, 930 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index ecc4c67..2160dc4 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -42,7 +42,6 @@
#include <machine/asmacros.h>
#include <machine/apicreg.h>
-#include <machine/smptests.h>
#include "assym.s"
@@ -50,19 +49,48 @@
* Macros to create and destroy a trap frame.
*/
#define PUSH_FRAME \
- pushl $0 ; /* dummy error code */ \
- pushl $0 ; /* dummy trap type */ \
- pushal ; /* 8 ints */ \
- pushl %ds ; /* save data and extra segments ... */ \
- pushl %es ; \
- pushl %fs
+ subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* Yes, dont swapgs again */ \
+ swapgs ; \
+1: movq %rdi,TF_RDI(%rsp) ; \
+ movq %rsi,TF_RSI(%rsp) ; \
+ movq %rdx,TF_RDX(%rsp) ; \
+ movq %rcx,TF_RCX(%rsp) ; \
+ movq %r8,TF_R8(%rsp) ; \
+ movq %r9,TF_R9(%rsp) ; \
+ movq %rax,TF_RAX(%rsp) ; \
+ movq %rbx,TF_RBX(%rsp) ; \
+ movq %rbp,TF_RBP(%rsp) ; \
+ movq %r10,TF_R10(%rsp) ; \
+ movq %r11,TF_R11(%rsp) ; \
+ movq %r12,TF_R12(%rsp) ; \
+ movq %r13,TF_R13(%rsp) ; \
+ movq %r14,TF_R14(%rsp) ; \
+ movq %r15,TF_R15(%rsp)
#define POP_FRAME \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp
+ movq TF_RDI(%rsp),%rdi ; \
+ movq TF_RSI(%rsp),%rsi ; \
+ movq TF_RDX(%rsp),%rdx ; \
+ movq TF_RCX(%rsp),%rcx ; \
+ movq TF_R8(%rsp),%r8 ; \
+ movq TF_R9(%rsp),%r9 ; \
+ movq TF_RAX(%rsp),%rax ; \
+ movq TF_RBX(%rsp),%rbx ; \
+ movq TF_RBP(%rsp),%rbp ; \
+ movq TF_R10(%rsp),%r10 ; \
+ movq TF_R11(%rsp),%r11 ; \
+ movq TF_R12(%rsp),%r12 ; \
+ movq TF_R13(%rsp),%r13 ; \
+ movq TF_R14(%rsp),%r14 ; \
+ movq TF_R15(%rsp),%r15 ; \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* keep kernel GS.base */ \
+ cli ; \
+ swapgs ; \
+1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
+
/*
* I/O Interrupt Entry Point. Rather than having one entry point for
@@ -76,21 +104,15 @@
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
PUSH_FRAME ; \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- mov %ax, %ds ; \
- mov %ax, %es ; \
- movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
- mov %ax, %fs ; \
- movl lapic, %edx ; /* pointer to local APIC */ \
- movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \
+ movq lapic, %rdx ; /* pointer to local APIC */ \
+ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \
bsrl %eax, %eax ; /* index of highset set bit in ISR */ \
jz 2f ; \
addl $(32 * index),%eax ; \
1: ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid double count */ \
- pushl %eax ; /* pass the IRQ */ \
+ movq %rax, %rdi ; /* pass the IRQ */ \
call lapic_handle_intr ; \
- addl $4, %esp ; /* discard parameter */ \
MEXITCOUNT ; \
jmp doreti ; \
2: movl $-1, %eax ; /* send a vector of -1 */ \
@@ -109,7 +131,7 @@ IDTVEC(spuriousint)
/* No EOI cycle used here */
- iret
+ iretq
MCOUNT_LABEL(bintr2)
ISR_VEC(1, apic_isr1)
@@ -128,32 +150,19 @@ MCOUNT_LABEL(eintr2)
.text
SUPERALIGN_TEXT
IDTVEC(invltlb)
- pushl %eax
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- mov %ax, %ds
-
-#ifdef COUNT_XINVLTLB_HITS
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- mov %ax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
- incl xhits_gbl(,%eax,4)
-#endif /* COUNT_XINVLTLB_HITS */
+ pushq %rax
- movl %cr3, %eax /* invalidate the TLB */
- movl %eax, %cr3
+ movq %cr3, %rax /* invalidate the TLB */
+ movq %rax, %cr3
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
- popl %ds
- popl %eax
- iret
+ popq %rax
+ iretq
/*
* Single page TLB shootdown
@@ -161,32 +170,19 @@ IDTVEC(invltlb)
.text
SUPERALIGN_TEXT
IDTVEC(invlpg)
- pushl %eax
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- mov %ax, %ds
-
-#ifdef COUNT_XINVLTLB_HITS
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- mov %ax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
- incl xhits_pg(,%eax,4)
-#endif /* COUNT_XINVLTLB_HITS */
+ pushq %rax
- movl smp_tlb_addr1, %eax
- invlpg (%eax) /* invalidate single page */
+ movq smp_tlb_addr1, %rax
+ invlpg (%rax) /* invalidate single page */
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
- popl %ds
- popl %eax
- iret
+ popq %rax
+ iretq
/*
* Page range TLB shootdown.
@@ -194,38 +190,25 @@ IDTVEC(invlpg)
.text
SUPERALIGN_TEXT
IDTVEC(invlrng)
- pushl %eax
- pushl %edx
- pushl %ds
- movl $KDSEL, %eax /* Kernel data selector */
- mov %ax, %ds
-
-#ifdef COUNT_XINVLTLB_HITS
- pushl %fs
- movl $KPSEL, %eax /* Private space selector */
- mov %ax, %fs
- movl PCPU(CPUID), %eax
- popl %fs
- incl xhits_rng(,%eax,4)
-#endif /* COUNT_XINVLTLB_HITS */
-
- movl smp_tlb_addr1, %edx
- movl smp_tlb_addr2, %eax
-1: invlpg (%edx) /* invalidate single page */
- addl $PAGE_SIZE, %edx
- cmpl %eax, %edx
+ pushq %rax
+ pushq %rdx
+
+ movq smp_tlb_addr1, %rdx
+ movq smp_tlb_addr2, %rax
+1: invlpg (%rdx) /* invalidate single page */
+ addq $PAGE_SIZE, %rdx
+ cmpq %rax, %rdx
jb 1b
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
- popl %ds
- popl %edx
- popl %eax
- iret
+ popq %rdx
+ popq %rax
+ iretq
/*
* Forward hardclock to another CPU. Pushes a clockframe and calls
@@ -235,18 +218,11 @@ IDTVEC(invlrng)
SUPERALIGN_TEXT
IDTVEC(hardclock)
PUSH_FRAME
- movl $KDSEL, %eax /* reload with kernel's data segment */
- mov %ax, %ds
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
- movl lapic, %edx
- movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
+ movq lapic, %rdx
+ movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */
- pushl $0 /* XXX convert trapframe to clockframe */
call forwarded_hardclock
- addl $4, %esp /* XXX convert clockframe to trapframe */
MEXITCOUNT
jmp doreti
@@ -258,20 +234,13 @@ IDTVEC(hardclock)
SUPERALIGN_TEXT
IDTVEC(statclock)
PUSH_FRAME
- movl $KDSEL, %eax /* reload with kernel's data segment */
- mov %ax, %ds
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
- movl lapic, %edx
- movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
+ movq lapic, %rdx
+ movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */
FAKE_MCOUNT(13*4(%esp))
- pushl $0 /* XXX convert trapframe to clockframe */
call forwarded_statclock
- addl $4, %esp /* XXX convert clockframe to trapframe */
MEXITCOUNT
jmp doreti
@@ -287,14 +256,9 @@ IDTVEC(statclock)
SUPERALIGN_TEXT
IDTVEC(cpuast)
PUSH_FRAME
- movl $KDSEL, %eax
- mov %ax, %ds /* use KERNEL data segment */
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
- movl lapic, %edx
- movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */
+ movq lapic, %rdx
+ movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */
FAKE_MCOUNT(13*4(%esp))
@@ -311,63 +275,41 @@ IDTVEC(cpuast)
.text
SUPERALIGN_TEXT
IDTVEC(cpustop)
- pushl %ebp
- movl %esp, %ebp
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl %ds /* save current data segment */
- pushl %es
- pushl %fs
-
- movl $KDSEL, %eax
- mov %ax, %ds /* use KERNEL data segment */
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
-
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
+ PUSH_FRAME
+
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
movl PCPU(CPUID), %eax
imull $PCB_SIZE, %eax
- leal CNAME(stoppcbs)(%eax), %eax
- pushl %eax
- call CNAME(savectx) /* Save process context */
- addl $4, %esp
+ leaq stoppcbs(%rax), %rdi
+ call savectx /* Save process context */
movl PCPU(CPUID), %eax
lock
- btsl %eax, CNAME(stopped_cpus) /* stopped_cpus |= (1<<id) */
+ btsl %eax, stopped_cpus /* stopped_cpus |= (1<<id) */
1:
- btl %eax, CNAME(started_cpus) /* while (!(started_cpus & (1<<id))) */
+ btl %eax, started_cpus /* while (!(started_cpus & (1<<id))) */
jnc 1b
lock
- btrl %eax, CNAME(started_cpus) /* started_cpus &= ~(1<<id) */
+ btrl %eax, started_cpus /* started_cpus &= ~(1<<id) */
lock
- btrl %eax, CNAME(stopped_cpus) /* stopped_cpus &= ~(1<<id) */
+ btrl %eax, stopped_cpus /* stopped_cpus &= ~(1<<id) */
test %eax, %eax
jnz 2f
- movl CNAME(cpustop_restartfunc), %eax
- test %eax, %eax
+ movq cpustop_restartfunc, %rax
+ testq %rax, %rax
jz 2f
- movl $0, CNAME(cpustop_restartfunc) /* One-shot */
+ movq $0, cpustop_restartfunc /* One-shot */
- call *%eax
+ call *%rax
2:
- popl %fs
- popl %es
- popl %ds /* restore previous data segment */
- popl %edx
- popl %ecx
- popl %eax
- movl %ebp, %esp
- popl %ebp
- iret
+ POP_FRAME
+ iretq
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
@@ -378,19 +320,13 @@ IDTVEC(cpustop)
SUPERALIGN_TEXT
IDTVEC(rendezvous)
PUSH_FRAME
- movl $KDSEL, %eax
- mov %ax, %ds /* use KERNEL data segment */
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
-
call smp_rendezvous_action
-
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
- POP_FRAME
- iret
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
+ POP_FRAME /* Why not doreti? */
+ iretq
+#ifdef LAZY_SWITCH
/*
* Clean up when we lose out on the lazy context switch optimization.
* ie: when we are about to release a PTD but a cpu is still borrowing it.
@@ -398,16 +334,10 @@ IDTVEC(rendezvous)
SUPERALIGN_TEXT
IDTVEC(lazypmap)
PUSH_FRAME
- movl $KDSEL, %eax
- mov %ax, %ds /* use KERNEL data segment */
- mov %ax, %es
- movl $KPSEL, %eax
- mov %ax, %fs
-
call pmap_lazyfix_action
-
- movl lapic, %eax
- movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */
- POP_FRAME
- iret
+ movq lapic, %rax
+ movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
+ POP_FRAME /* Why not doreti? */
+ iretq
+#endif
#endif /* SMP */
diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c
index d6ce6b6..adec2e0 100644
--- a/sys/amd64/amd64/autoconf.c
+++ b/sys/amd64/amd64/autoconf.c
@@ -76,7 +76,6 @@ __FBSDID("$FreeBSD$");
#include <nfsclient/nfsdiskless.h>
#include <machine/md_var.h>
-#include <amd64/isa/icu.h>
#ifdef DEV_ISA
#include <isa/isavar.h>
@@ -109,23 +108,11 @@ configure(dummy)
{
/*
- * Activate the ICU's. Note that we are explicitly at splhigh()
- * at present as we have no way to disable stray PCI level triggered
- * interrupts until the devices have had a driver attached. This
- * is particularly a problem when the interrupts are shared. For
- * example, if IRQ 10 is shared between a disk and network device
- * and the disk device generates an interrupt, if we "activate"
- * IRQ 10 when the network driver is set up, then we will get
- * recursive interrupt 10's as nothing will know how to turn off
- * the disk device's interrupt.
- *
- * Having the ICU's active means we can probe interrupt routing to
- * see if a device causes the corresponding pending bit to be set.
- *
- * This is all rather inconvenient.
+ * Enable interrupts on the processor. The interrupts are still
+ * disabled in the interrupt controllers until interrupt handlers
+ * are registered.
*/
enable_intr();
- INTREN(IRQ_SLAVE);
/* nexus0 is the top of the i386 device tree */
device_add_child(root_bus, "nexus", 0);
@@ -141,12 +128,6 @@ configure(dummy)
if (isa_bus_device)
isa_probe_children(isa_bus_device);
#endif
-
- /*
- * Now we're ready to handle (pending) interrupts.
- * XXX this is slightly misplaced.
- */
- spl0();
}
static void
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 56f0c84..3bfcfc8 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -59,14 +59,16 @@
* %rsi = newtd
*/
ENTRY(cpu_throw)
- xorq %rax, %rax
movl PCPU(CPUID), %eax
testq %rdi,%rdi /* no thread? */
jz 1f
/* release bit from old pm_active */
movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */
movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */
- btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */
+#ifdef SMP
+ lock
+#endif
+ btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */
1:
movq TD_PCB(%rsi),%rdx /* newtd->td_proc */
movq PCB_CR3(%rdx),%rdx
@@ -74,7 +76,10 @@ ENTRY(cpu_throw)
/* set bit in new pm_active */
movq TD_PROC(%rsi),%rdx
movq P_VMSPACE(%rdx), %rdx
- btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */
+#ifdef SMP
+ lock
+#endif
+ btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */
jmp sw1
/*
@@ -143,7 +148,6 @@ ENTRY(cpu_switch)
jz badsw3 /* no, panic */
#endif
movq TD_PCB(%rsi),%r8
- xorq %rax, %rax
movl PCPU(CPUID), %eax
/* switch address space */
@@ -153,12 +157,18 @@ ENTRY(cpu_switch)
/* Release bit from old pmap->pm_active */
movq TD_PROC(%rdi), %rdx /* oldproc */
movq P_VMSPACE(%rdx), %rdx
- btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */
+#ifdef SMP
+ lock
+#endif
+ btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */
/* Set bit in new pmap->pm_active */
movq TD_PROC(%rsi),%rdx /* newproc */
movq P_VMSPACE(%rdx), %rdx
- btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */
+#ifdef SMP
+ lock
+#endif
+ btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */
sw1:
/*
@@ -191,8 +201,11 @@ sw1:
wrmsr
/* Update the TSS_RSP0 pointer for the next interrupt */
+ movq PCPU(TSSP), %rax
+ addq $COMMON_TSS_RSP0, %rax
leaq -16(%r8), %rbx
- movq %rbx, common_tss + COMMON_TSS_RSP0
+ movq %rbx, (%rax)
+ movq %rbx, PCPU(RSP0)
/* Restore context. */
movq PCB_RBX(%r8),%rbx
diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index 3dd6a8a..077c914 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
@@ -98,6 +98,22 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs)
ef = read_rflags();
disable_intr();
+#ifdef SMP
+
+#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+ db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid),
+ PCPU_GET(other_cpus));
+#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
+
+ /* We stop all CPUs except ourselves (obviously) */
+ stop_cpus(PCPU_GET(other_cpus));
+
+#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+ db_printf(" stopped.\n");
+#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
+
+#endif /* SMP */
+
switch (type) {
case T_BPTFLT: /* breakpoint */
case T_TRCTRAP: /* debug exception */
@@ -192,6 +208,27 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs)
regs->tf_ds = ddb_regs.tf_ds & 0xffff;
#endif
+#ifdef SMP
+
+#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+ db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid),
+ stopped_cpus);
+#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
+
+ /* Restart all the CPUs we previously stopped */
+ if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) {
+ db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
+ PCPU_GET(other_cpus), stopped_cpus);
+ panic("stop_cpus() failed");
+ }
+ restart_cpus(stopped_cpus);
+
+#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK)
+ db_printf(" restarted.\n");
+#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
+
+#endif /* SMP */
+
write_rflags(ef);
return (1);
diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index a05348a..7dba9bb 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -245,8 +245,9 @@ db_nextframe(fp, ip, p)
if (strcmp(name, "calltrap") == 0 ||
strcmp(name, "fork_trampoline") == 0)
frame_type = TRAP;
- else if (strncmp(name, "Xintr", 5) == 0 ||
- strncmp(name, "Xfastintr", 9) == 0)
+ else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
+ strncmp(name, "Xatpic_fastintr", 15) == 0 ||
+ strncmp(name, "Xapic_isr", 9) == 0)
frame_type = INTERRUPT;
else if (strcmp(name, "Xfast_syscall") == 0)
frame_type = SYSCALL;
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 3d2eaa6..972f19c 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -35,14 +35,11 @@
*/
#include <machine/asmacros.h>
-#include <sys/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include "assym.s"
-#define SEL_RPL_MASK 0x0003
-
.text
/*****************************************************************************/
@@ -72,8 +69,6 @@
* %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
* must load them with appropriate values for supervisor mode operation.
*/
-#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
- .type __CONCAT(X,name),@function; __CONCAT(X,name):
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
@@ -223,7 +218,7 @@ IDTVEC(page)
IDTVEC(fast_syscall)
swapgs
movq %rsp,PCPU(SCRATCH_RSP)
- movq common_tss+COMMON_TSS_RSP0,%rsp
+ movq PCPU(RSP0),%rsp
/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
subq $TF_SIZE,%rsp
/* defer TF_RSP till we have a spare register */
@@ -297,14 +292,6 @@ ENTRY(fork_trampoline)
call fork_exit
jmp doreti /* Handle any ASTs */
-
-/*
- * Include what was once config+isa-dependent code.
- * XXX it should be in a stand-alone file. It's still icu-dependent and
- * belongs in i386/isa.
- */
-#include "amd64/isa/vector.S"
-
.data
ALIGN_DATA
@@ -406,5 +393,3 @@ doreti_iret_fault:
movq $T_PROTFLT,TF_TRAPNO(%rsp)
movq $0,TF_ERR(%rsp) /* XXX should be the error code */
jmp alltraps_with_regs_pushed
-
-#include "amd64/isa/icu_ipl.S"
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 1e4890c..1acb931 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cputypes.h>
#include <machine/frame.h>
+#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/psl.h>
@@ -63,8 +64,6 @@ __FBSDID("$FreeBSD$");
#include <machine/segments.h>
#include <machine/ucontext.h>
-#include <amd64/isa/intr_machdep.h>
-
/*
* Floating point support.
*/
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 6a017e5..27a1a12 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -69,10 +69,12 @@ __FBSDID("$FreeBSD$");
#include <nfs/rpcv2.h>
#include <nfsclient/nfs.h>
#include <nfsclient/nfsdiskless.h>
+#include <machine/apicreg.h>
#include <machine/cpu.h>
#include <machine/sigframe.h>
#include <machine/proc.h>
#include <machine/specialreg.h>
+#include <machine/segments.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -83,11 +85,6 @@ ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
-ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
-ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
-ASSYM(TD_MD, offsetof(struct thread, td_md));
-
-ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
@@ -180,6 +177,7 @@ ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags));
ASSYM(ENOENT, ENOENT);
ASSYM(EFAULT, EFAULT);
ASSYM(ENAMETOOLONG, ENAMETOOLONG);
+ASSYM(MAXCOMLEN, MAXCOMLEN);
ASSYM(MAXPATHLEN, MAXPATHLEN);
ASSYM(PC_SIZEOF, sizeof(struct pcpu));
ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
@@ -189,12 +187,24 @@ ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
+ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
+ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
+
+ASSYM(LA_VER, offsetof(struct LAPIC, version));
+ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
+ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
+ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
+ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
+ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
+ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL));
+ASSYM(SEL_RPL_MASK, SEL_RPL_MASK);
ASSYM(MSR_FSBASE, MSR_FSBASE);
ASSYM(MSR_GSBASE, MSR_GSBASE);
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
index ba8e58e..f3d70c2 100644
--- a/sys/amd64/amd64/identcpu.c
+++ b/sys/amd64/amd64/identcpu.c
@@ -55,12 +55,13 @@ __FBSDID("$FreeBSD$");
#include <machine/asmacros.h>
#include <machine/clock.h>
#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/md_var.h>
#include <amd64/isa/icu.h>
-#include <amd64/isa/intr_machdep.h>
/* XXX - should be in header file: */
void printcpuinfo(void);
diff --git a/sys/amd64/amd64/io_apic.c b/sys/amd64/amd64/io_apic.c
index 4af70fa..b620440 100644
--- a/sys/amd64/amd64/io_apic.c
+++ b/sys/amd64/amd64/io_apic.c
@@ -30,6 +30,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_atpic.h"
#include "opt_isa.h"
#include "opt_no_mixed_mode.h"
@@ -50,8 +51,8 @@ __FBSDID("$FreeBSD$");
#include <machine/apicvar.h>
#include <machine/segments.h>
-#if defined(DEV_ISA) && !defined(NO_MIXED_MODE)
-#define MIXED_MODE
+#if defined(DEV_ISA) && defined(DEV_ATPIC) && !defined(NO_MIXED_MODE)
+#define MIXED_MODE
#endif
#define IOAPIC_ISA_INTS 16
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c
index 6f942bf..bdff518 100644
--- a/sys/amd64/amd64/local_apic.c
+++ b/sys/amd64/amd64/local_apic.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/pcpu.h>
+#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -171,8 +172,7 @@ lapic_init(uintptr_t addr)
KASSERT(trunc_page(addr) == addr,
("local APIC not aligned on a page boundary"));
lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t));
- setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
+ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
/* Perform basic initialization of the BSP's local APIC. */
value = lapic->svr;
@@ -242,8 +242,7 @@ lapic_enable_intr(u_int irq)
KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
KASSERT(ioint_handlers[vector / 32] != NULL,
("No ISR handler for IRQ %u", irq));
- setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
+ setidt(vector, ioint_handlers[vector / 32], SDT_SYSIGT, SEL_KPL, 0);
}
void
@@ -478,13 +477,14 @@ lapic_eoi(void)
}
void
-lapic_handle_intr(struct intrframe frame)
+lapic_handle_intr(void *cookie, struct intrframe frame)
{
struct intsrc *isrc;
+ int vec = (uintptr_t)cookie;
- if (frame.if_vec == -1)
+ if (vec == -1)
panic("Couldn't get vector from ISR!");
- isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec));
+ isrc = intr_lookup_source(apic_idt_to_irq(vec));
intr_execute_handlers(isrc, &frame);
}
@@ -589,21 +589,9 @@ static void
apic_setup_local(void *dummy __unused)
{
int retval;
- uint64_t apic_base;
if (best_enum == NULL)
return;
- /*
- * To work around an errata, we disable the local APIC on some
- * CPUs during early startup. We need to turn the local APIC back
- * on on such CPUs now.
- */
- if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 &&
- (cpu_id & 0xff0) == 0x610) {
- apic_base = rdmsr(MSR_APICBASE);
- apic_base |= APICBASE_ENABLED;
- wrmsr(MSR_APICBASE, apic_base);
- }
retval = best_enum->apic_setup_local();
if (retval != 0)
printf("%s: Failed to setup the local APIC: returned %d\n",
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e32d82a..2140d7a 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -42,6 +42,7 @@
__FBSDID("$FreeBSD$");
#include "opt_atalk.h"
+#include "opt_atpic.h"
#include "opt_compat.h"
#include "opt_cpu.h"
#include "opt_ddb.h"
@@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <machine/reg.h>
#include <machine/clock.h>
#include <machine/specialreg.h>
+#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/proc.h>
@@ -108,9 +110,13 @@ __FBSDID("$FreeBSD$");
#include <machine/perfmon.h>
#endif
#include <machine/tss.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
#include <amd64/isa/icu.h>
-#include <amd64/isa/intr_machdep.h>
+
+#include <isa/isareg.h>
#include <isa/rtc.h>
#include <sys/ptrace.h>
#include <machine/sigframe.h>
@@ -146,7 +152,9 @@ vm_paddr_t phys_avail[10];
struct kva_md_info kmi;
static struct trapframe proc0_tf;
-static struct pcpu __pcpu;
+struct region_descriptor r_gdt, r_idt;
+
+struct pcpu __pcpu[MAXCPU];
struct mtx icu_lock;
@@ -196,7 +204,6 @@ cpu_startup(dummy)
bufinit();
vm_pager_bufferinit();
- /* For SMP, we delay the cpu_setregs() until after SMP startup. */
cpu_setregs();
}
@@ -589,13 +596,13 @@ SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
* Initialize segments & interrupt table
*/
-struct user_segment_descriptor gdt[NGDT];/* global descriptor table */
+struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
static char dblfault_stack[PAGE_SIZE] __aligned(16);
-struct amd64tss common_tss;
+struct amd64tss common_tss[MAXCPU];
/* software prototypes -- in more palatable form */
struct soft_segment_descriptor gdt_segs[] = {
@@ -755,6 +762,15 @@ ssdtosyssd(ssd, sd)
sd->sd_gran = ssd->ssd_gran;
}
+#if !defined(DEV_ATPIC) && defined(DEV_ISA)
+#include <isa/isavar.h>
+u_int
+isa_irq_pending(void)
+{
+
+ return (0);
+}
+#endif
#define PHYSMAP_SIZE (2 * 8)
@@ -783,7 +799,6 @@ static void
getmemsize(caddr_t kmdp, u_int64_t first)
{
int i, physmap_idx, pa_indx;
- u_int extmem;
vm_paddr_t pa, physmap[PHYSMAP_SIZE];
pt_entry_t *pte;
char *cp;
@@ -802,12 +817,9 @@ getmemsize(caddr_t kmdp, u_int64_t first)
* ie: an int32_t immediately precedes smap.
*/
smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP);
- if (smapbase == 0)
- smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | 0x0009); /* Old value for MODINFOMD_SMAP */
- if (smapbase == 0) {
+ if (smapbase == NULL)
panic("No BIOS smap info from loader!");
- goto deep_shit;
- }
+
smapsize = *((u_int32_t *)smapbase - 1);
smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
@@ -816,14 +828,11 @@ getmemsize(caddr_t kmdp, u_int64_t first)
printf("SMAP type=%02x base=%016lx len=%016lx\n",
smap->type, smap->base, smap->length);
- if (smap->type != 0x01) {
+ if (smap->type != 0x01)
continue;
- }
- if (smap->length == 0) {
-next_run:
+ if (smap->length == 0)
continue;
- }
for (i = 0; i <= physmap_idx; i += 2) {
if (smap->base < physmap[i + 1]) {
@@ -836,6 +845,7 @@ next_run:
if (smap->base == physmap[physmap_idx + 1]) {
physmap[physmap_idx + 1] += smap->length;
+next_run:
continue;
}
@@ -850,69 +860,23 @@ next_run:
}
/*
- * Perform "base memory" related probes & setup based on SMAP
+ * Find the 'base memory' segment for SMP
*/
-deep_shit:
- if (basemem == 0) {
- for (i = 0; i <= physmap_idx; i += 2) {
- if (physmap[i] == 0x00000000) {
- basemem = physmap[i + 1] / 1024;
- break;
- }
- }
-
- if (basemem == 0) {
- basemem = rtcin(RTC_BASELO) + (rtcin(RTC_BASEHI) << 8);
- }
-
- if (basemem == 0) {
- basemem = 640;
- }
-
- if (basemem > 640) {
- printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
- basemem);
- basemem = 640;
+ basemem = 0;
+ for (i = 0; i <= physmap_idx; i += 2) {
+ if (physmap[i] == 0x00000000) {
+ basemem = physmap[i + 1] / 1024;
+ break;
}
-
-#if 0
- for (pa = trunc_page(basemem * 1024);
- pa < ISA_HOLE_START; pa += PAGE_SIZE)
- pmap_kenter(KERNBASE + pa, pa);
-#endif
}
+ if (basemem == 0)
+ panic("BIOS smap did not include a basemem segment!");
- if (physmap[1] != 0)
- goto physmap_done;
-
- /*
- * Prefer the RTC value for extended memory.
- */
- extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
-
- /*
- * Special hack for chipsets that still remap the 384k hole when
- * there's 16MB of memory - this really confuses people that
- * are trying to use bus mastering ISA controllers with the
- * "16MB limit"; they only have 16MB, but the remapping puts
- * them beyond the limit.
- *
- * If extended memory is between 15-16MB (16-17MB phys address range),
- * chop it to 15MB.
- */
- if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
- extmem = 15 * 1024;
-
- physmap[0] = 0;
- physmap[1] = basemem * 1024;
- physmap_idx = 2;
- physmap[physmap_idx] = 0x100000;
- physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
+#ifdef SMP
+ /* make hole for AP bootstrap code */
+ physmap[1] = mp_bootaddress(physmap[1] / 1024);
+#endif
-physmap_done:
- /*
- * Now, physmap contains a map of physical memory.
- */
/*
* Maxmem isn't the "maximum memory", it's one larger than the
* highest page of the physical address space. It should be
@@ -929,7 +893,8 @@ physmap_done:
* hw.physmem is a size in bytes; we also allow k, m, and g suffixes
* for the appropriate modifiers. This overrides MAXMEM.
*/
- if ((cp = getenv("hw.physmem")) != NULL) {
+ cp = getenv("hw.physmem");
+ if (cp != NULL) {
u_int64_t AllowMem, sanity;
char *ep;
@@ -1106,11 +1071,18 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
{
caddr_t kmdp;
int gsel_tss, off, x;
- struct region_descriptor r_gdt, r_idt;
struct pcpu *pc;
u_int64_t msr;
char *env;
+#ifdef DEV_ISA
+ /* Preemptively mask the atpics and leave them shut down */
+ outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
+ outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
+#else
+#error "have you forgotten the isa device?";
+#endif
+
/* Turn on PTE NX (no execute) bit */
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
@@ -1146,7 +1118,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/*
* make gdt memory segments
*/
- gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss;
+ gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
for (x = 0; x < NGDT; x++) {
if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
@@ -1157,7 +1129,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
r_gdt.rd_base = (long) gdt;
lgdt(&r_gdt);
- pc = &__pcpu;
+ pc = &__pcpu[0];
wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
@@ -1166,6 +1138,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
pcpu_init(pc, 0, sizeof(struct pcpu));
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
+ PCPU_SET(tssp, &common_tss[0]);
/*
* Initialize mutexes.
@@ -1211,8 +1184,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
*/
cninit();
-#ifdef DEV_ISA
- isa_defaultirq();
+#ifdef DEV_ATPIC
+ atpic_startup();
#endif
#ifdef DDB
@@ -1225,12 +1198,14 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
initializecpu(); /* Initialize CPU registers */
/* make an initial tss so cpu can get interrupt stack on syscall! */
- common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
+ common_tss[0].tss_rsp0 = thread0.td_kstack + \
+ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
/* Ensure the stack is aligned to 16 bytes */
- common_tss.tss_rsp0 &= ~0xF;
+ common_tss[0].tss_rsp0 &= ~0xF;
+ PCPU_SET(rsp0, common_tss[0].tss_rsp0);
/* doublefault stack space, runs on ist1 */
- common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
+ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
ltr(gsel_tss);
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index 56d268c..aeb2e35 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -323,6 +323,15 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg)
return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
}
+#ifdef SMP
+void
+mem_range_AP_init(void)
+{
+ if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
+ (mem_range_softc.mr_op->initAP(&mem_range_softc));
+}
+#endif
+
static int
mem_modevent(module_t mod, int type, void *data)
{
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index f58a94f..fcd478b 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 1996, by Steve Passe
+ * Copyright (c) 2003, by Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,30 +27,12 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#if defined(I386_CPU) && !defined(COMPILING_LINT)
-#error SMP not supported with I386_CPU
-#endif
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config file.
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
-#include <sys/cons.h> /* cngetc() */
#ifdef GPROF
#include <sys/gmon.h>
#endif
@@ -75,9 +58,8 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/smp.h>
-#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
#include <machine/specialreg.h>
-#include <machine/privatespace.h>
+#include <machine/tss.h>
#define WARMBOOT_TARGET 0
#define WARMBOOT_OFF (KERNBASE + 0x0467)
@@ -88,67 +70,9 @@ __FBSDID("$FreeBSD$");
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
-/*
- * this code MUST be enabled here and in mpboot.s.
- * it follows the very early stages of AP boot by placing values in CMOS ram.
- * it NORMALLY will never be needed and thus the primitive method for enabling.
- *
-#define CHECK_POINTS
- */
-
-#if defined(CHECK_POINTS) && !defined(PC98)
-#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
-#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
-
-#define CHECK_INIT(D); \
- CHECK_WRITE(0x34, (D)); \
- CHECK_WRITE(0x35, (D)); \
- CHECK_WRITE(0x36, (D)); \
- CHECK_WRITE(0x37, (D)); \
- CHECK_WRITE(0x38, (D)); \
- CHECK_WRITE(0x39, (D));
-
-#define CHECK_PRINT(S); \
- printf("%s: %d, %d, %d, %d, %d, %d\n", \
- (S), \
- CHECK_READ(0x34), \
- CHECK_READ(0x35), \
- CHECK_READ(0x36), \
- CHECK_READ(0x37), \
- CHECK_READ(0x38), \
- CHECK_READ(0x39));
-
-#else /* CHECK_POINTS */
-
-#define CHECK_INIT(D)
-#define CHECK_PRINT(S)
-#define CHECK_WRITE(A, D)
-
-#endif /* CHECK_POINTS */
-
-/*
- * Values to send to the POST hardware.
- */
-#define MP_BOOTADDRESS_POST 0x10
-#define MP_PROBE_POST 0x11
-#define MPTABLE_PASS1_POST 0x12
-
-#define MP_START_POST 0x13
-#define MP_ENABLE_POST 0x14
-#define MPTABLE_PASS2_POST 0x15
-
-#define START_ALL_APS_POST 0x16
-#define INSTALL_AP_TRAMP_POST 0x17
-#define START_AP_POST 0x18
-
-#define MP_ANNOUNCE_POST 0x19
-
/* lock region used by kernel profiling */
int mcount_lock;
-/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
-int current_postcode;
-
int mp_naps; /* # of Applications processors */
int boot_cpu_id = -1; /* designated BSP */
extern int nkpt;
@@ -164,6 +88,9 @@ struct cpu_top *smp_topology;
char *bootSTK;
static int bootAP;
+/* Free these after use */
+void *bootstacks[MAXCPU];
+
/* Hotwire a 0->4MB V==P mapping */
extern pt_entry_t *KPTphys;
@@ -178,6 +105,8 @@ vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
struct mtx smp_tlb_mtx;
+extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
+
/*
* Local data and functions.
*/
@@ -201,17 +130,17 @@ struct cpu_info {
} static cpu_info[MAXCPU];
static int cpu_apic_ids[MAXCPU];
-static u_int boot_address;
+static u_int boot_address;
static void set_logical_apic_ids(void);
static int start_all_aps(void);
-static void install_ap_tramp(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
static int hlt_cpus_mask;
static int hlt_logical_cpus;
static struct sysctl_ctx_list logical_cpu_clist;
+static u_int bootMP_size;
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -219,13 +148,15 @@ static struct sysctl_ctx_list logical_cpu_clist;
u_int
mp_bootaddress(u_int basemem)
{
- POSTCODE(MP_BOOTADDRESS_POST);
- boot_address = trunc_page(basemem); /* round down to 4k boundary */
+ bootMP_size = mptramp_end - mptramp_start;
+ boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
if ((basemem - boot_address) < bootMP_size)
boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
+ /* 3 levels of page table pages */
+ mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
- return boot_address;
+ return mptramp_pagetables;
}
void
@@ -302,43 +233,34 @@ cpu_mp_start(void)
{
int i;
- POSTCODE(MP_START_POST);
-
/* Initialize the logical ID to APIC ID table. */
for (i = 0; i < MAXCPU; i++)
cpu_apic_ids[i] = -1;
/* Install an inter-CPU IPI for TLB invalidation */
- setidt(IPI_INVLTLB, IDTVEC(invltlb),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
- setidt(IPI_INVLPG, IDTVEC(invlpg),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
- setidt(IPI_INVLRNG, IDTVEC(invlrng),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for forwarding hardclock() */
- setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_HARDCLOCK, IDTVEC(hardclock), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for forwarding statclock() */
- setidt(IPI_STATCLOCK, IDTVEC(statclock),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_STATCLOCK, IDTVEC(statclock), SDT_SYSIGT, SEL_KPL, 0);
+#ifdef LAZY_SWITCH
/* Install an inter-CPU IPI for lazy pmap release */
- setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYSIGT, SEL_KPL, 0);
+#endif
/* Install an inter-CPU IPI for all-CPU rendezvous */
- setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for forcing an additional software trap */
- setidt(IPI_AST, IDTVEC(cpuast),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_AST, IDTVEC(cpuast), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for CPU stop/restart */
- setidt(IPI_STOP, IDTVEC(cpustop),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
@@ -371,8 +293,6 @@ cpu_mp_announce(void)
{
int i, x;
- POSTCODE(MP_ANNOUNCE_POST);
-
/* List CPUs */
printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
for (i = 1, x = 0; x < MAXCPU; x++) {
@@ -390,38 +310,41 @@ cpu_mp_announce(void)
void
init_secondary(void)
{
- int gsel_tss;
- int x, myid;
- u_int cr0;
-
- /* bootAP is set in start_ap() to our ID. */
- myid = bootAP;
- gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
- gdt_segs[GPROC0_SEL].ssd_base =
- (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
- SMP_prvspace[myid].pcpu.pc_prvspace =
- &SMP_prvspace[myid].pcpu;
-
- for (x = 0; x < NGDT; x++) {
- ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
- }
+ struct pcpu *pc;
+ u_int64_t msr, cr0;
+ int cpu, gsel_tss;
+
+ /* Set by the startup code for us to use */
+ cpu = bootAP;
+
+ /* Init tss */
+ common_tss[cpu] = common_tss[0];
+ common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */
+
+ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
+ ssdtosyssd(&gdt_segs[GPROC0_SEL],
+ (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
- r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
- r_gdt.rd_base = (int) &gdt[myid * NGDT];
lgdt(&r_gdt); /* does magic intra-segment return */
- lidt(&r_idt);
+ /* Get per-cpu data */
+ pc = &__pcpu[cpu];
+
+ /* prime data page for it to use */
+ pcpu_init(pc, cpu, sizeof(struct pcpu));
+ pc->pc_apic_id = cpu_apic_ids[cpu];
+ pc->pc_prvspace = pc;
+ pc->pc_curthread = 0;
+ pc->pc_tssp = &common_tss[cpu];
+ pc->pc_rsp0 = 0;
- lldt(_default_ldt);
- PCPU_SET(currentldt, _default_ldt);
+ wrmsr(MSR_FSBASE, 0); /* User value */
+ wrmsr(MSR_GSBASE, (u_int64_t)pc);
+ wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */
+
+ lidt(&r_idt);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
- PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
ltr(gsel_tss);
/*
@@ -432,32 +355,32 @@ init_secondary(void)
cr0 = rcr0();
cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
load_cr0(cr0);
- CHECK_WRITE(0x38, 5);
-
- /* Disable local APIC just to be sure. */
+
+ /* Set up the fast syscall stuff */
+ msr = rdmsr(MSR_EFER) | EFER_SCE;
+ wrmsr(MSR_EFER, msr);
+ wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
+ wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
+ msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
+ ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
+ wrmsr(MSR_STAR, msr);
+ wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
+
+ /* Disable local apic just to be sure. */
lapic_disable();
/* signal our startup to the BSP. */
mp_naps++;
- CHECK_WRITE(0x39, 6);
/* Spin until the BSP releases the AP's. */
while (!aps_ready)
ia32_pause();
- /* BSP may have changed PTD while we were waiting */
- invltlb();
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- lidt(&r_idt);
-#endif
-
/* set up CPU registers and state */
cpu_setregs();
/* set up FPU state on the AP */
- npxinit(__INITIAL_NPXCW__);
+ fpuinit();
/* set up SSE registers */
enable_sse();
@@ -467,7 +390,6 @@ init_secondary(void)
printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
printf("SMP: actual apic_id = %d\n", lapic_id());
printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
panic("cpuid mismatch! boom!!");
}
@@ -559,39 +481,51 @@ set_logical_apic_ids(void)
static int
start_all_aps(void)
{
-#ifndef PC98
u_char mpbiosreason;
-#endif
- u_long mpbioswarmvec;
- struct pcpu *pc;
- char *stack;
- uintptr_t kptbase;
- int i, pg, apic_id, cpu;
-
- POSTCODE(START_ALL_APS_POST);
+ u_int32_t mpbioswarmvec;
+ int apic_id, cpu, i;
+ u_int64_t *pt4, *pt3, *pt2;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
/* install the AP 1st level boot code */
- install_ap_tramp();
+ pmap_kenter(boot_address + KERNBASE, boot_address);
+ bcopy(mptramp_start, (void *)((uintptr_t)boot_address + KERNBASE), bootMP_size);
+
+ /* Locate the page tables, they'll be below the trampoline */
+ pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
+ pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
+ pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
+
+ /* Create the initial 1GB replicated page tables */
+ for (i = 0; i < 512; i++) {
+ /* Each slot of the level 4 pages points to the same level 3 page */
+ pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
+ pt4[i] |= PG_V | PG_RW | PG_U;
+
+ /* Each slot of the level 3 pages points to the same level 2 page */
+ pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
+ pt3[i] |= PG_V | PG_RW | PG_U;
+
+ /* The level 2 page slots are mapped with 2MB pages for 1GB. */
+ pt2[i] = i * (2 * 1024 * 1024);
+ pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+ }
/* save the current value of the warm-start vector */
- mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
-#ifndef PC98
+ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
outb(CMOS_REG, BIOS_RESET);
mpbiosreason = inb(CMOS_DATA);
-#endif
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
- kptbase = (uintptr_t)(void *)KPTphys;
- for (i = 0; i < NKPT; i++)
- PTD[i] = (pd_entry_t)(PG_V | PG_RW |
- ((kptbase + i * PAGE_SIZE) & PG_FRAME));
- invltlb();
+ /* setup a vector to our boot code */
+ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+ *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+ outb(CMOS_REG, BIOS_RESET);
+ outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
/* start each AP */
- for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
+ cpu = 0;
+ for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
if (!cpu_info[apic_id].cpu_present ||
cpu_info[apic_id].cpu_bsp)
continue;
@@ -600,48 +534,18 @@ start_all_aps(void)
/* save APIC ID for this logical ID */
cpu_apic_ids[cpu] = apic_id;
- /* first page of AP's private space */
- pg = cpu * i386_btop(sizeof(struct privatespace));
-
- /* allocate a new private data page */
- pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
-
- /* wire it into the private page table page */
- SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
-
/* allocate and set up an idle stack data page */
- stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
- for (i = 0; i < KSTACK_PAGES; i++)
- SMPpt[pg + 1 + i] = (pt_entry_t)
- (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
-
- /* prime data page for it to use */
- pcpu_init(pc, cpu, sizeof(struct pcpu));
- pc->pc_apic_id = apic_id;
-
- /* setup a vector to our boot code */
- *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
- *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
-#ifndef PC98
- outb(CMOS_REG, BIOS_RESET);
- outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
-#endif
+ bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
- bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
- PAGE_SIZE];
+ bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
bootAP = cpu;
/* attempt to start the Application Processor */
- CHECK_INIT(99); /* setup checkpoints */
if (!start_ap(apic_id)) {
- printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
- CHECK_PRINT("trace"); /* show checkpoints */
- /* better panic as the AP may be running loose */
- printf("panic y/n? [y] ");
- if (cngetc() != 'n')
- panic("bye-bye");
+ /* restore the warmstart vector */
+ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
+ panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
}
- CHECK_PRINT("trace"); /* show checkpoints */
all_cpus |= (1 << cpu); /* record AP in CPU map */
}
@@ -650,92 +554,15 @@ start_all_aps(void)
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
/* restore the warmstart vector */
- *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
-#ifndef PC98
+ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
+
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
-#endif
-
- /*
- * Set up the idle context for the BSP. Similar to above except
- * that some was done by locore, some by pmap.c and some is implicit
- * because the BSP is cpu#0 and the page is initially zero and also
- * because we can refer to variables by name on the BSP..
- */
-
- /* Allocate and setup BSP idle stack */
- stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
- for (i = 0; i < KSTACK_PAGES; i++)
- SMPpt[1 + i] = (pt_entry_t)
- (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
-
- for (i = 0; i < NKPT; i++)
- PTD[i] = 0;
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
/* number of APs actually started */
return mp_naps;
}
-/*
- * load the 1st level AP boot code into base memory.
- */
-
-/* targets for relocation */
-extern void bigJump(void);
-extern void bootCodeSeg(void);
-extern void bootDataSeg(void);
-extern void MPentry(void);
-extern u_int MP_GDT;
-extern u_int mp_gdtbase;
-
-static void
-install_ap_tramp(void)
-{
- int x;
- int size = *(int *) ((u_long) & bootMP_size);
- u_char *src = (u_char *) ((u_long) bootMP);
- u_char *dst = (u_char *) boot_address + KERNBASE;
- u_int boot_base = (u_int) bootMP;
- u_int8_t *dst8;
- u_int16_t *dst16;
- u_int32_t *dst32;
-
- POSTCODE(INSTALL_AP_TRAMP_POST);
-
- pmap_kenter(boot_address + KERNBASE, boot_address);
- for (x = 0; x < size; ++x)
- *dst++ = *src++;
-
- /*
- * modify addresses in code we just moved to basemem. unfortunately we
- * need fairly detailed info about mpboot.s for this to work. changes
- * to mpboot.s might require changes here.
- */
-
- /* boot code is located in KERNEL space */
- dst = (u_char *) boot_address + KERNBASE;
-
- /* modify the lgdt arg */
- dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
- *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
-
- /* modify the ljmp target for MPentry() */
- dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
- *dst32 = ((u_int) MPentry - KERNBASE);
-
- /* modify the target for boot code segment */
- dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
- dst8 = (u_int8_t *) (dst16 + 1);
- *dst16 = (u_int) boot_address & 0xffff;
- *dst8 = ((u_int) boot_address >> 16) & 0xff;
-
- /* modify the target for boot data segment */
- dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
- dst8 = (u_int8_t *) (dst16 + 1);
- *dst16 = (u_int) boot_address & 0xffff;
- *dst8 = ((u_int) boot_address >> 16) & 0xff;
-}
/*
* This function starts the AP (application processor) identified
@@ -750,8 +577,6 @@ start_ap(int apic_id)
int vector, ms;
int cpus;
- POSTCODE(START_AP_POST);
-
/* calculate the vector */
vector = (boot_address >> 12) & 0xff;
@@ -810,50 +635,14 @@ start_ap(int apic_id)
DELAY(200); /* wait ~200uS */
/* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
+ for (ms = 0; ms < 50; ms++) {
if (mp_naps > cpus)
return 1; /* return SUCCESS */
- DELAY(1000);
+ DELAY(100000);
}
return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
- 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
/*
* Flush the TLB on all other CPU's
*/
@@ -966,69 +755,49 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
void
smp_invltlb(void)
{
- if (smp_started) {
+
+ if (smp_started)
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
- }
}
void
smp_invlpg(vm_offset_t addr)
{
- if (smp_started) {
+
+ if (smp_started)
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
- }
}
void
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
- if (smp_started) {
+
+ if (smp_started)
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
}
void
smp_masked_invltlb(u_int mask)
{
- if (smp_started) {
+
+ if (smp_started)
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_global++;
-#endif
- }
}
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
- if (smp_started) {
+
+ if (smp_started)
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_page++;
-#endif
- }
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
- if (smp_started) {
+
+ if (smp_started)
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_range++;
- ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
}
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index 8f42f6b..ca53a87 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 1995, Jack F. Vogel
+/*-
+ * Copyright (c) 2003 Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -10,16 +10,11 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Jack F. Vogel
- * 4. The name of the developer may be used to endorse or promote products
- * derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -28,257 +23,214 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * mpboot.s: FreeBSD machine support for the Intel MP Spec
- * multiprocessor systems.
- *
* $FreeBSD$
*/
-#include "opt_pmap.h"
-
#include <machine/asmacros.h> /* miscellaneous asm macros */
-#include <machine/apicreg.h>
#include <machine/specialreg.h>
#include "assym.s"
-#define R(x) ((x)-KERNBASE)
-
-/*
- * this code MUST be enabled here and in mp_machdep.c
- * it follows the very early stages of AP boot by placing values in CMOS ram.
- * it NORMALLY will never be needed and thus the primitive method for enabling.
- *
-#define CHECK_POINTS
- */
-
-#if defined(CHECK_POINTS) && !defined(PC98)
-
-#define CMOS_REG (0x70)
-#define CMOS_DATA (0x71)
-
-#define CHECKPOINT(A,D) \
- movb $(A),%al ; \
- outb %al,$CMOS_REG ; \
- movb $(D),%al ; \
- outb %al,$CMOS_DATA
+ .data /* So we can modify it */
-#else
-
-#define CHECKPOINT(A,D)
-
-#endif /* CHECK_POINTS */
-
-
-/*
- * the APs enter here from their trampoline code (bootMP, below)
- */
- .p2align 4
-
-NON_GPROF_ENTRY(MPentry)
- CHECKPOINT(0x36, 3)
+ .p2align 4,0
+ .globl mptramp_start
+mptramp_start:
+ .code16
/*
- * Enable features on this processor. We don't support SMP on
- * CPUs older than a Pentium, so we know that we can use the cpuid
- * instruction.
+ * The AP enters here in response to the startup IPI.
+ * We are in real mode. %cs is the only segment register set.
*/
- movl $1,%eax
- cpuid /* Retrieve features */
- movl %cr4,%eax
-#ifndef DISABLE_PSE
- testl $CPUID_PSE,%edx
- jz 1f
- orl $CR4_PSE,%eax /* Enable PSE */
-1:
-#endif
-#ifndef DISABLE_PG_G
- testl $CPUID_PGE,%edx
- jz 1f
- orl $CR4_PGE,%eax /* Enable PGE */
-1:
-#endif
- testl $CPUID_VME,%edx
- jz 1f
- orl $CR4_VME,%eax /* Enable VME */
-1:
- movl %eax,%cr4
-
- /* Now enable paging mode */
-#ifdef PAE
- movl R(IdlePDPT), %eax
- movl %eax, %cr3
- movl %cr4, %eax
- orl $CR4_PAE, %eax
- movl %eax, %cr4
-#else
- movl R(IdlePTD), %eax
- movl %eax,%cr3
-#endif
- movl %cr0,%eax
- orl $CR0_PE|CR0_PG,%eax /* enable paging */
- movl %eax,%cr0 /* let the games begin! */
- movl bootSTK,%esp /* boot stack end loc. */
-
- pushl $mp_begin /* jump to high mem */
- ret
+ cli /* make sure no interrupts */
+ mov %cs, %ax /* copy %cs to %ds. Remember these */
+ mov %ax, %ds /* are offsets rather than selectors */
+ mov %ax, %ss
/*
- * Wait for the booting CPU to signal startup
+ * Find relocation base and patch the gdt descript and ljmp targets
*/
-mp_begin: /* now running relocated at KERNBASE */
- CHECKPOINT(0x37, 4)
- call init_secondary /* load i386 tables */
-
-/*
- * This is the embedded trampoline or bootstrap that is
- * copied into 'real-mode' low memory, it is where the
- * secondary processor "wakes up". When it is executed
- * the processor will eventually jump into the routine
- * MPentry, which resides in normal kernel text above
- * 1Meg. -jackv
- */
+ xorl %ebx,%ebx
+ mov %cs, %bx
+ sall $4, %ebx /* %ebx is now our relocation base */
+ orl %ebx, lgdt_desc-mptramp_start+2
+ orl %ebx, jmp_32-mptramp_start+2
+ orl %ebx, jmp_64-mptramp_start+1
- .data
- ALIGN_DATA /* just to be sure */
+ /*
+ * Load the descriptor table pointer. We'll need it when running
+ * in 16 bit protected mode.
+ */
+ lgdt lgdt_desc-mptramp_start
-BOOTMP1:
+ /* Enable protected mode */
+ movl $CR0_PE, %eax
+ mov %eax, %cr0
-NON_GPROF_ENTRY(bootMP)
- .code16
- cli
- CHECKPOINT(0x34, 1)
- /* First guarantee a 'clean slate' */
- xorl %eax, %eax
- movl %eax, %ebx
- movl %eax, %ecx
- movl %eax, %edx
- movl %eax, %esi
- movl %eax, %edi
+ /*
+ * Now execute a far jump to turn on protected mode. This
+ * causes the segment registers to turn into selectors and causes
+ * %cs to be loaded from the gdt.
+ *
+ * The following instruction is:
+ * ljmpl $bootcode-gdt, $protmode-mptramp_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+jmp_32:
+ .byte 0x66 /* size override to 32 bits */
+ .byte 0xea /* opcode for far jump */
+ .long protmode-mptramp_start /* offset in segment */
+ .word bootcode-gdt /* index in gdt for 32 bit code */
- /* set up data segments */
- mov %cs, %ax
+ /*
+ * At this point, we are running in 32 bit legacy protected mode.
+ */
+ .code32
+protmode:
+ mov $bootdata-gdt, %eax
mov %ax, %ds
- mov %ax, %es
- mov %ax, %fs
- mov %ax, %gs
- mov %ax, %ss
- mov $(boot_stk-bootMP), %esp
- /* Now load the global descriptor table */
- lgdt MP_GDTptr-bootMP
+ /* Turn on the PAE, PSE and PGE bits for when paging is enabled */
+ mov %cr4, %eax
+ orl $(CR4_PAE | CR4_PSE), %eax
+ mov %eax, %cr4
- /* Enable protected mode */
- movl %cr0, %eax
- orl $CR0_PE, %eax
- movl %eax, %cr0
+ /*
+ * Enable EFER.LME so that we get long mode when all the prereqs are
+ * in place. In this case, it turns on when CR0_PG is finally enabled.
+ * Pick up a few other EFER bits that we'll use need we're here.
+ */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_LME | EFER_SCE | EFER_NXE, %eax
+ wrmsr
/*
- * make intrasegment jump to flush the processor pipeline and
- * reload CS register
+ * Point to the embedded page tables for startup. Note that this
+ * only gets accessed after we're actually in 64 bit mode, however
+ * we can only set the bottom 32 bits of %cr3 in this state. This
+ * means we are required to use a temporary page table that is below
+ * the 4GB limit. %ebx is still our relocation base. We could just
+ * subtract 3 * PAGE_SIZE, but that would be too easy.
*/
- pushl $0x18
- pushl $(protmode-bootMP)
- lretl
+ leal mptramp_pagetables-mptramp_start(%ebx),%eax
+ movl (%eax), %eax
+ mov %eax, %cr3
- .code32
-protmode:
- CHECKPOINT(0x35, 2)
+ /*
+ * Finally, switch to long bit mode by enabling paging. We have
+ * to be very careful here because all the segmentation disappears
+ * out from underneath us. The spec says we can depend on the
+ * subsequent pipelined branch to execute, but *only if* everthing
+ * is still identity mapped. If any mappings change, the pipeline
+ * will flush.
+ */
+ mov %cr0, %eax
+ orl $CR0_PG, %eax
+ mov %eax, %cr0
/*
- * we are NOW running for the first time with %eip
- * having the full physical address, BUT we still
- * are using a segment descriptor with the origin
- * not matching the booting kernel.
+ * At this point paging is enabled, and we are in "compatability" mode.
+ * We do another far jump to reload %cs with the 64 bit selector.
+ * %cr3 points to a 4-level page table page.
+ * We cannot yet jump all the way to the kernel because we can only
+ * specify a 32 bit linear address. So, yet another trampoline.
*
- * SO NOW... for the BIG Jump into kernel's segment
- * and physical text above 1 Meg.
+ * The following instruction is:
+ * ljmp $kernelcode-gdt, $tramp_64-mptramp_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
*/
- mov $0x10, %ebx
- movw %bx, %ds
- movw %bx, %es
- movw %bx, %fs
- movw %bx, %gs
- movw %bx, %ss
-
- .globl bigJump
-bigJump:
- /* this will be modified by mpInstallTramp() */
- ljmp $0x08, $0 /* far jmp to MPentry() */
-
-dead: hlt /* We should never get here */
- jmp dead
-
-/*
- * MP boot strap Global Descriptor Table
- */
- .p2align 4
- .globl MP_GDT
- .globl bootCodeSeg
- .globl bootDataSeg
-MP_GDT:
-
-nulldesc: /* offset = 0x0 */
+jmp_64:
+ .byte 0xea /* opcode for far jump */
+ .long tramp_64-mptramp_start /* offset in segment */
+ .word kernelcode-gdt /* index in gdt for 64 bit code */
- .word 0x0
- .word 0x0
- .byte 0x0
- .byte 0x0
- .byte 0x0
- .byte 0x0
-
-kernelcode: /* offset = 0x08 */
-
- .word 0xffff /* segment limit 0..15 */
- .word 0x0000 /* segment base 0..15 */
- .byte 0x0 /* segment base 16..23; set for 0K */
- .byte 0x9f /* flags; Type */
- .byte 0xcf /* flags; Limit */
- .byte 0x0 /* segment base 24..32 */
-
-kerneldata: /* offset = 0x10 */
-
- .word 0xffff /* segment limit 0..15 */
- .word 0x0000 /* segment base 0..15 */
- .byte 0x0 /* segment base 16..23; set for 0k */
- .byte 0x93 /* flags; Type */
- .byte 0xcf /* flags; Limit */
- .byte 0x0 /* segment base 24..32 */
+ /*
+ * Yeehar! We're running in 64 bit mode! We can mostly ignore our
+ * segment registers, and get on with it.
+ * Note that we are running at the correct virtual address, but with
+ * a 1:1 1GB mirrored mapping over entire address space. We had better
+ * switch to a real %cr3 promptly so that we can get to the direct map
+ * space. Remember that jmp is relative and that we've been relocated,
+ * so use an indirect jump.
+ */
+ .code64
+tramp_64:
+ movabsq $entry_64,%rax /* 64 bit immediate load */
+ jmp *%rax
-bootcode: /* offset = 0x18 */
+ .p2align 4,0
+gdt:
+ /*
+ * All segment descriptor tables start with a null descriptor
+ */
+ .long 0x00000000
+ .long 0x00000000
- .word 0xffff /* segment limit 0..15 */
-bootCodeSeg: /* this will be modified by mpInstallTramp() */
- .word 0x0000 /* segment base 0..15 */
- .byte 0x00 /* segment base 16...23; set for 0x000xx000 */
- .byte 0x9e /* flags; Type */
- .byte 0xcf /* flags; Limit */
- .byte 0x0 /*segment base 24..32 */
+ /*
+ * This is the 64 bit long mode code descriptor. There is no
+ * 64 bit data descriptor.
+ */
+kernelcode:
+ .long 0x00000000
+ .long 0x00209800
-bootdata: /* offset = 0x20 */
+ /*
+ * This is the descriptor for the 32 bit boot code.
+ * %cs: +A, +R, -C, DPL=0, +P, +D, +G
+ * Accessed, Readable, Present, 32 bit, 4G granularity
+ */
+bootcode:
+ .long 0x0000ffff
+ .long 0x00cf9b00
- .word 0xffff
-bootDataSeg: /* this will be modified by mpInstallTramp() */
- .word 0x0000 /* segment base 0..15 */
- .byte 0x00 /* segment base 16...23; set for 0x000xx000 */
- .byte 0x92
- .byte 0xcf
- .byte 0x0
+ /*
+ * This is the descriptor for the 32 bit boot data.
+ * We load it into %ds and %ss. The bits for each selector
+ * are interpreted slightly differently.
+ * %ds: +A, +W, -E, DPL=0, +P, +D, +G
+ * %ss: +A, +W, -E, DPL=0, +P, +B, +G
+ * Accessed, Writeable, Expand up, Present, 32 bit, 4GB
+ * For %ds, +D means 'default operand size is 32 bit'.
+ * For %ss, +B means the stack register is %esp rather than %sp.
+ */
+bootdata:
+ .long 0x0000ffff
+ .long 0x00cf9300
-/*
- * GDT pointer for the lgdt call
- */
- .globl mp_gdtbase
+gdtend:
-MP_GDTptr:
-mp_gdtlimit:
- .word 0x0028
-mp_gdtbase: /* this will be modified by mpInstallTramp() */
+ /*
+ * The address of our page table pages that the boot code
+ * uses to trampoline up to kernel address space.
+ */
+ .globl mptramp_pagetables
+mptramp_pagetables:
.long 0
- .space 0x100 /* space for boot_stk - 1st temporary stack */
-boot_stk:
+ /*
+ * The pseudo descriptor for lgdt to use.
+ */
+lgdt_desc:
+ .word gdtend-gdt /* Length */
+ .long gdt-mptramp_start /* Offset plus %ds << 4 */
+
+ .globl mptramp_end
+mptramp_end:
-BOOTMP2:
- .globl bootMP_size
-bootMP_size:
- .long BOOTMP2 - BOOTMP1
+ /*
+ * From here on down is executed in the kernel .text section.
+ *
+ * Load a real %cr3 that has all the direct map stuff and switches
+ * off the 1GB replicated mirror. Load a stack pointer and jump
+ * into AP startup code in C.
+ */
+ .text
+ .code64
+ .p2align 4,0
+entry_64:
+ movq KPML4phys, %rax
+ movq %rax, %cr3
+ movq bootSTK, %rsp
+ jmp init_secondary
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 908e65a..f0a9883 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -27,7 +27,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_mptable_force_htt.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
@@ -57,13 +56,8 @@ __FBSDID("$FreeBSD$");
#define NAPICID 32 /* Max number of I/O APIC's */
-#ifdef PC98
-#define BIOS_BASE (0xe8000)
-#define BIOS_SIZE (0x18000)
-#else
#define BIOS_BASE (0xf0000)
#define BIOS_SIZE (0x10000)
-#endif
#define BIOS_COUNT (BIOS_SIZE/4)
typedef void mptable_entry_handler(u_char *entry, void *arg);
@@ -226,11 +220,12 @@ static int
mptable_probe(void)
{
int x;
- u_long segment;
+ u_int32_t segment;
u_int32_t target;
/* see if EBDA exists */
- if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
+ segment = (u_int32_t) *(u_short *)(KERNBASE + 0x40e);
+ if (segment != 0) {
/* search first 1K of EBDA */
target = (u_int32_t) (segment << 4);
if ((x = search_for_sig(target, 1024 / 4)) >= 0)
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index 952ceaf..1fab16b 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/module.h>
#include <machine/bus.h>
+#include <machine/intr_machdep.h>
#include <sys/rman.h>
#include <sys/interrupt.h>
@@ -64,8 +65,6 @@ __FBSDID("$FreeBSD$");
#include <isa/isavar.h>
#include <amd64/isa/isa.h>
#endif
-#include <amd64/isa/icu.h>
-#include <amd64/isa/intr_machdep.h>
#include <sys/rtprio.h>
static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device");
@@ -156,14 +155,11 @@ nexus_probe(device_t dev)
* multi-ISA-bus systems. PCI interrupts are routed to the ISA
* component, so in a way, PCI can be a partial child of an ISA bus(!).
* APIC interrupts are global though.
- *
- * XXX We depend on the AT PIC driver correctly claiming IRQ 2
- * to prevent its reuse elsewhere.
*/
irq_rman.rm_start = 0;
irq_rman.rm_type = RMAN_ARRAY;
irq_rman.rm_descr = "Interrupt request lines";
- irq_rman.rm_end = 15;
+ irq_rman.rm_end = NUM_IO_INTS - 1;
if (rman_init(&irq_rman)
|| rman_manage_region(&irq_rman,
irq_rman.rm_start, irq_rman.rm_end))
@@ -428,7 +424,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
if (error)
return (error);
- error = inthand_add(device_get_nameunit(child), irq->r_start,
+ error = intr_add_handler(device_get_nameunit(child), irq->r_start,
ihand, arg, flags, cookiep);
return (error);
@@ -437,7 +433,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
static int
nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih)
{
- return (inthand_remove(ih));
+ return (intr_remove_handler(ih));
}
static int
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 4e4c124..b2f0c18 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -118,6 +118,9 @@ __FBSDID("$FreeBSD$");
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -134,6 +137,9 @@ __FBSDID("$FreeBSD$");
#include <machine/cputypes.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
#define PMAP_KEEP_PDIRS
#ifndef PMAP_SHPGPERPROC
@@ -163,6 +169,11 @@ struct pmap kernel_pmap_store;
LIST_HEAD(pmaplist, pmap);
static struct pmaplist allpmaps;
static struct mtx allpmaps_lock;
+#ifdef LAZY_SWITCH
+#ifdef SMP
+static struct mtx lazypmap_lock;
+#endif
+#endif
vm_paddr_t avail_start; /* PA of first available physical page */
vm_paddr_t avail_end; /* PA of last available physical page */
@@ -477,6 +488,11 @@ pmap_bootstrap(firstaddr)
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvlist);
LIST_INIT(&allpmaps);
+#ifdef LAZY_SWITCH
+#ifdef SMP
+ mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN);
+#endif
+#endif
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
@@ -630,8 +646,121 @@ pmap_track_modified(vm_offset_t va)
return 0;
}
+#ifdef SMP
/*
- * Normal invalidation functions.
+ * For SMP, these functions have to use the IPI mechanism for coherence.
+ */
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+ u_int cpumask;
+ u_int other_cpus;
+
+ if (smp_started) {
+ if (!(read_rflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_tlb_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ invlpg(va);
+ smp_invlpg(va);
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ invlpg(va);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_tlb_mtx);
+ else
+ critical_exit();
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ u_int cpumask;
+ u_int other_cpus;
+ vm_offset_t addr;
+
+ if (smp_started) {
+ if (!(read_rflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_tlb_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ smp_invlpg_range(sva, eva);
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg_range(pmap->pm_active & other_cpus,
+ sva, eva);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_tlb_mtx);
+ else
+ critical_exit();
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+ u_int cpumask;
+ u_int other_cpus;
+
+ if (smp_started) {
+ if (!(read_rflags() & PSL_I))
+ panic("%s: interrupts disabled", __func__);
+ mtx_lock_spin(&smp_tlb_mtx);
+ } else
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ * XXX critical sections disable interrupts again
+ */
+ if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ invltlb();
+ smp_invltlb();
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ invltlb();
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invltlb(pmap->pm_active & other_cpus);
+ }
+ if (smp_started)
+ mtx_unlock_spin(&smp_tlb_mtx);
+ else
+ critical_exit();
+}
+#else /* !SMP */
+/*
+ * Normal, non-SMP, invalidation functions.
* We inline these within pmap.c for speed.
*/
PMAP_INLINE void
@@ -659,6 +788,7 @@ pmap_invalidate_all(pmap_t pmap)
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
+#endif /* !SMP */
/*
* Are we current address space or kernel?
@@ -1208,6 +1338,93 @@ retry:
* Pmap allocation/deallocation routines.
***************************************************/
+#ifdef LAZY_SWITCH
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of. This can be a bit hairy.
+ */
+static u_int *lazymask;
+static register_t lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+ u_int mymask = PCPU_GET(cpumask);
+
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ atomic_clear_int(lazymask, mymask);
+ atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(u_int mymask)
+{
+
+ if (rcr3() == lazyptd)
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ atomic_clear_int(lazymask, mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_int mymask = PCPU_GET(cpumask);
+ u_int mask;
+ register u_int spins;
+
+ while ((mask = pmap->pm_active) != 0) {
+ spins = 50000000;
+ mask = mask & -mask; /* Find least significant set bit */
+ mtx_lock_spin(&lazypmap_lock);
+ lazyptd = vtophys(pmap->pm_pml4);
+ if (mask == mymask) {
+ lazymask = &pmap->pm_active;
+ pmap_lazyfix_self(mymask);
+ } else {
+ atomic_store_rel_long((u_long *)&lazymask,
+ (u_long)&pmap->pm_active);
+ atomic_store_rel_int(&lazywait, 0);
+ ipi_selected(mask, IPI_LAZYPMAP);
+ while (lazywait == 0) {
+ ia32_pause();
+ if (--spins == 0)
+ break;
+ }
+ }
+ mtx_unlock_spin(&lazypmap_lock);
+ if (spins == 0)
+ printf("pmap_lazyfix: spun for 50000000\n");
+ }
+}
+
+#else /* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy. For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+ u_long cr3;
+
+ cr3 = vtophys(pmap->pm_pml4);
+ if (cr3 == rcr3()) {
+ load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+ pmap->pm_active &= ~(PCPU_GET(cpumask));
+ }
+}
+#endif /* SMP */
+#endif
+
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
@@ -1222,6 +1439,9 @@ pmap_release(pmap_t pmap)
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
+#ifdef LAZY_SWITCH
+ pmap_lazyfix(pmap);
+#endif
mtx_lock_spin(&allpmaps_lock);
LIST_REMOVE(pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
@@ -2777,12 +2997,21 @@ void
pmap_activate(struct thread *td)
{
struct proc *p = td->td_proc;
- pmap_t pmap;
+ pmap_t pmap, oldpmap;
u_int64_t cr3;
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
+ oldpmap = PCPU_GET(curpmap);
+#ifdef SMP
+if (oldpmap) /* XXX FIXME */
+ atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
+ atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+#else
+if (oldpmap) /* XXX FIXME */
+ oldpmap->pm_active &= ~PCPU_GET(cpumask);
pmap->pm_active |= PCPU_GET(cpumask);
+#endif
cr3 = vtophys(pmap->pm_pml4);
/* XXXKSE this is wrong.
* pmap_activate is for the current thread on the current cpu
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 94d7bba..2c0ddf1 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -37,10 +37,21 @@
#include "opt_ddb.h"
#include <machine/asmacros.h>
+#include <machine/intr_machdep.h>
#include <machine/pmap.h>
#include "assym.s"
+ ALIGN_DATA
+ .globl intrcnt, eintrcnt
+intrcnt:
+ .space INTRCNT_COUNT * 4
+eintrcnt:
+
+ .globl intrnames, eintrnames
+intrnames:
+ .space INTRCNT_COUNT * (MAXCOMLEN + 1)
+eintrnames:
.text
@@ -302,6 +313,9 @@ ENTRY(casuptr)
ja fusufault
movq %rsi, %rax /* old */
+#ifdef SMP
+ lock
+#endif
cmpxchgq %rdx, (%rdi) /* new = %rdx */
/*
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 65ae8f9..0e6b95b 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -80,13 +80,14 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
#include <machine/tss.h>
-#include <amd64/isa/icu.h>
-#include <amd64/isa/intr_machdep.h>
-
#include <ddb/ddb.h>
extern void trap(struct trapframe frame);
@@ -564,6 +565,11 @@ trap_fatal(frame, eva)
printf("\n\nFatal trap %d: %s while in %s mode\n",
type, trap_msg[type],
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+ /* two separate prints in case of a trap on an unmapped page */
+ printf("cpuid = %d; ", PCPU_GET(cpuid));
+ printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
if (type == T_PAGEFLT) {
printf("fault virtual address = 0x%lx\n", eva);
printf("fault code = %s %s, %s\n",
@@ -631,6 +637,11 @@ void
dblfault_handler()
{
printf("\nFatal double fault\n");
+#ifdef SMP
+ /* two separate prints in case of a trap on an unmapped page */
+ printf("cpuid = %d; ", PCPU_GET(cpuid));
+ printf("apic id = %02x\n", PCPU_GET(apic_id));
+#endif
panic("double fault");
}
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 5485511..6a5b17c 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -77,14 +77,26 @@ init_TSC(void)
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %lu Hz\n", tsc_freq);
-
- return;
}
+
void
init_TSC_tc(void)
{
+#ifdef SMP
+ /*
+ * We can not use the TSC in SMP mode unless the TSCs on all CPUs
+ * are somehow synchronized. Some hardware configurations do
+ * this, but we have no way of determining whether this is the
+ * case, so we do not use the TSC in multi-processor systems
+ * unless the user indicated (by setting kern.timecounter.smp_tsc
+ * to 1) that he believes that his TSCs are synchronized.
+ */
+ if (mp_ncpus > 1 && !smp_tsc)
+ tsc_timecounter.tc_quality = -100;
+#endif
+
if (tsc_freq != 0 && !tsc_is_broken) {
tsc_timecounter.tc_frequency = tsc_freq;
tc_init(&tsc_timecounter);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 9b6bc1f..c4f583f 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/sf_buf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
@@ -80,6 +81,11 @@ __FBSDID("$FreeBSD$");
#include <amd64/isa/isa.h>
static void cpu_reset_real(void);
+#ifdef SMP
+static void cpu_reset_proxy(void);
+static u_int cpu_reset_proxyid;
+static volatile u_int cpu_reset_proxy_active;
+#endif
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
@@ -336,10 +342,69 @@ cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
* Force reset the processor by invalidating the entire address space!
*/
+#ifdef SMP
+static void
+cpu_reset_proxy()
+{
+
+ cpu_reset_proxy_active = 1;
+ while (cpu_reset_proxy_active == 1)
+ ; /* Wait for other cpu to see that we've started */
+ stop_cpus((1<<cpu_reset_proxyid));
+ printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
+ DELAY(1000000);
+ cpu_reset_real();
+}
+#endif
+
void
cpu_reset()
{
+#ifdef SMP
+ if (smp_active == 0) {
+ cpu_reset_real();
+ /* NOTREACHED */
+ } else {
+
+ u_int map;
+ int cnt;
+ printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid));
+
+ map = PCPU_GET(other_cpus) & ~ stopped_cpus;
+
+ if (map != 0) {
+ printf("cpu_reset: Stopping other CPUs\n");
+ stop_cpus(map); /* Stop all other CPUs */
+ }
+
+ if (PCPU_GET(cpuid) == 0) {
+ DELAY(1000000);
+ cpu_reset_real();
+ /* NOTREACHED */
+ } else {
+ /* We are not BSP (CPU #0) */
+
+ cpu_reset_proxyid = PCPU_GET(cpuid);
+ cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
+ printf("cpu_reset: Restarting BSP\n");
+ started_cpus = (1<<0); /* Restart CPU #0 */
+
+ cnt = 0;
+ while (cpu_reset_proxy_active == 0 && cnt < 10000000)
+ cnt++; /* Wait for BSP to announce restart */
+ if (cpu_reset_proxy_active == 0)
+ printf("cpu_reset: Failed to restart BSP\n");
+ enable_intr();
+ cpu_reset_proxy_active = 2;
+
+ while (1);
+ /* NOTREACHED */
+ }
+ }
+#else
cpu_reset_real();
+#endif
}
static void
OpenPOWER on IntegriCloud