diff options
author | peter <peter@FreeBSD.org> | 2003-11-17 08:58:16 +0000 |
---|---|---|
committer | peter <peter@FreeBSD.org> | 2003-11-17 08:58:16 +0000 |
commit | 9dedda25aa3f6dd18f44e322691c1351377f1927 (patch) | |
tree | aabbe8bf492a150ee82a721cb9396dd2a5b08c50 /sys | |
parent | 71a2f6d14698b35350148cb96e1da0939b08ccb2 (diff) | |
download | FreeBSD-src-9dedda25aa3f6dd18f44e322691c1351377f1927.zip FreeBSD-src-9dedda25aa3f6dd18f44e322691c1351377f1927.tar.gz |
Initial landing of SMP support for FreeBSD/amd64.
- This is heavily derived from John Baldwin's apic/pci cleanup on i386.
- I have completely rewritten or drastically cleaned up some other parts.
(in particular, bootstrap)
- This is still a WIP. It seems that there are some highly bogus bioses
on nVidia nForce3-150 boards. I can't stress how broken these boards
are. I have a workaround in mind, but right now the Asus SK8N is broken.
The Gigabyte K8NPro (nVidia based) is also mind-numbingly hosed.
- Most of my testing has been with SCHED_ULE. SCHED_4BSD works.
- the apic and acpi components are 'standard'.
- If you have an nVidia nForce3-150 board, you are stuck with 'device
atpic' in addition, because they somehow managed to forget to connect the
8254 timer to the apic, even though its in the same silicon! ARGH!
This directly violates the ACPI spec.
Diffstat (limited to 'sys')
50 files changed, 1113 insertions, 2298 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index ecc4c67..2160dc4 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -42,7 +42,6 @@ #include <machine/asmacros.h> #include <machine/apicreg.h> -#include <machine/smptests.h> #include "assym.s" @@ -50,19 +49,48 @@ * Macros to create and destroy a trap frame. */ #define PUSH_FRAME \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) #define POP_FRAME \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp + movq TF_RDI(%rsp),%rdi ; \ + movq TF_RSI(%rsp),%rsi ; \ + movq TF_RDX(%rsp),%rdx ; \ + movq TF_RCX(%rsp),%rcx ; \ + movq TF_R8(%rsp),%r8 ; \ + movq TF_R9(%rsp),%r9 ; \ + movq TF_RAX(%rsp),%rax ; \ + movq TF_RBX(%rsp),%rbx ; \ + movq TF_RBP(%rsp),%rbp ; \ + movq TF_R10(%rsp),%r10 ; \ + movq TF_R11(%rsp),%r11 ; \ + movq TF_R12(%rsp),%r12 ; \ + movq TF_R13(%rsp),%r13 ; \ + movq TF_R14(%rsp),%r14 ; \ + movq TF_R15(%rsp),%r15 ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* keep kernel GS.base */ \ + cli ; \ + swapgs ; \ +1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ + /* * I/O Interrupt Entry Point. Rather than having one entry point for @@ -76,21 +104,15 @@ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ - mov %ax, %fs ; \ - movl lapic, %edx ; /* pointer to local APIC */ \ - movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ + movq lapic, %rdx ; /* pointer to local APIC */ \ + movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ bsrl %eax, %eax ; /* index of highset set bit in ISR */ \ jz 2f ; \ addl $(32 * index),%eax ; \ 1: ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid double count */ \ - pushl %eax ; /* pass the IRQ */ \ + movq %rax, %rdi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ - addl $4, %esp ; /* discard parameter */ \ MEXITCOUNT ; \ jmp doreti ; \ 2: movl $-1, %eax ; /* send a vector of -1 */ \ @@ -109,7 +131,7 @@ IDTVEC(spuriousint) /* No EOI cycle used here */ - iret + iretq MCOUNT_LABEL(bintr2) ISR_VEC(1, apic_isr1) @@ -128,32 +150,19 @@ MCOUNT_LABEL(eintr2) .text SUPERALIGN_TEXT IDTVEC(invltlb) - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_gbl(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ + pushq %rax - movl %cr3, %eax /* invalidate the TLB */ - movl %eax, %cr3 + movq %cr3, %rax /* invalidate the TLB */ + movq %rax, %cr3 - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %eax - iret + popq %rax + iretq /* * Single page TLB shootdown @@ -161,32 +170,19 @@ IDTVEC(invltlb) .text SUPERALIGN_TEXT IDTVEC(invlpg) - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_pg(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ + pushq %rax - movl smp_tlb_addr1, %eax - invlpg (%eax) /* invalidate single page */ + movq smp_tlb_addr1, %rax + invlpg (%rax) /* invalidate single page */ - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %eax - iret + popq %rax + iretq /* * Page range TLB shootdown. @@ -194,38 +190,25 @@ IDTVEC(invlpg) .text SUPERALIGN_TEXT IDTVEC(invlrng) - pushl %eax - pushl %edx - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_rng(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %edx - movl smp_tlb_addr2, %eax -1: invlpg (%edx) /* invalidate single page */ - addl $PAGE_SIZE, %edx - cmpl %eax, %edx + pushq %rax + pushq %rdx + + movq smp_tlb_addr1, %rdx + movq smp_tlb_addr2, %rax +1: invlpg (%rdx) /* invalidate single page */ + addq $PAGE_SIZE, %rdx + cmpq %rax, %rdx jb 1b - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %edx - popl %eax - iret + popq %rdx + popq %rax + iretq /* * Forward hardclock to another CPU. Pushes a clockframe and calls @@ -235,18 +218,11 @@ IDTVEC(invlrng) SUPERALIGN_TEXT IDTVEC(hardclock) PUSH_FRAME - movl $KDSEL, %eax /* reload with kernel's data segment */ - mov %ax, %ds - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ - pushl $0 /* XXX convert trapframe to clockframe */ call forwarded_hardclock - addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti @@ -258,20 +234,13 @@ IDTVEC(hardclock) SUPERALIGN_TEXT IDTVEC(statclock) PUSH_FRAME - movl $KDSEL, %eax /* reload with kernel's data segment */ - mov %ax, %ds - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) - pushl $0 /* XXX convert trapframe to clockframe */ call forwarded_statclock - addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti @@ -287,14 +256,9 @@ IDTVEC(statclock) SUPERALIGN_TEXT IDTVEC(cpuast) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) @@ -311,63 +275,41 @@ IDTVEC(cpuast) .text SUPERALIGN_TEXT IDTVEC(cpustop) - pushl %ebp - movl %esp, %ebp - pushl %eax - pushl %ecx - pushl %edx - pushl %ds /* save current data segment */ - pushl %es - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + PUSH_FRAME + + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ movl PCPU(CPUID), %eax imull $PCB_SIZE, %eax - leal CNAME(stoppcbs)(%eax), %eax - pushl %eax - call CNAME(savectx) /* Save process context */ - addl $4, %esp + leaq stoppcbs(%rax), %rdi + call savectx /* Save process context */ movl PCPU(CPUID), %eax lock - btsl %eax, CNAME(stopped_cpus) /* stopped_cpus |= (1<<id) */ + btsl %eax, stopped_cpus /* stopped_cpus |= (1<<id) */ 1: - btl %eax, CNAME(started_cpus) /* while (!(started_cpus & (1<<id))) */ + btl %eax, started_cpus /* while (!(started_cpus & (1<<id))) */ jnc 1b lock - btrl %eax, CNAME(started_cpus) /* started_cpus &= ~(1<<id) */ + btrl %eax, started_cpus /* started_cpus &= ~(1<<id) */ lock - btrl %eax, CNAME(stopped_cpus) /* stopped_cpus &= ~(1<<id) */ + btrl %eax, stopped_cpus /* stopped_cpus &= ~(1<<id) */ test %eax, %eax jnz 2f - movl CNAME(cpustop_restartfunc), %eax - test %eax, %eax + movq cpustop_restartfunc, %rax + testq %rax, %rax jz 2f - movl $0, CNAME(cpustop_restartfunc) /* One-shot */ + movq $0, cpustop_restartfunc /* One-shot */ - call *%eax + call *%rax 2: - popl %fs - popl %es - popl %ds /* restore previous data segment */ - popl %edx - popl %ecx - popl %eax - movl %ebp, %esp - popl %ebp - iret + POP_FRAME + iretq /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. @@ -378,19 +320,13 @@ IDTVEC(cpustop) SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - call smp_rendezvous_action - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - POP_FRAME - iret + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + POP_FRAME /* Why not doreti? */ + iretq +#ifdef LAZY_SWITCH /* * Clean up when we lose out on the lazy context switch optimization. * ie: when we are about to release a PTD but a cpu is still borrowing it. @@ -398,16 +334,10 @@ IDTVEC(rendezvous) SUPERALIGN_TEXT IDTVEC(lazypmap) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - call pmap_lazyfix_action - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - POP_FRAME - iret + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + POP_FRAME /* Why not doreti? */ + iretq +#endif #endif /* SMP */ diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c index d6ce6b6..adec2e0 100644 --- a/sys/amd64/amd64/autoconf.c +++ b/sys/amd64/amd64/autoconf.c @@ -76,7 +76,6 @@ __FBSDID("$FreeBSD$"); #include <nfsclient/nfsdiskless.h> #include <machine/md_var.h> -#include <amd64/isa/icu.h> #ifdef DEV_ISA #include <isa/isavar.h> @@ -109,23 +108,11 @@ configure(dummy) { /* - * Activate the ICU's. Note that we are explicitly at splhigh() - * at present as we have no way to disable stray PCI level triggered - * interrupts until the devices have had a driver attached. This - * is particularly a problem when the interrupts are shared. For - * example, if IRQ 10 is shared between a disk and network device - * and the disk device generates an interrupt, if we "activate" - * IRQ 10 when the network driver is set up, then we will get - * recursive interrupt 10's as nothing will know how to turn off - * the disk device's interrupt. - * - * Having the ICU's active means we can probe interrupt routing to - * see if a device causes the corresponding pending bit to be set. - * - * This is all rather inconvenient. + * Enable interrupts on the processor. The interrupts are still + * disabled in the interrupt controllers until interrupt handlers + * are registered. */ enable_intr(); - INTREN(IRQ_SLAVE); /* nexus0 is the top of the i386 device tree */ device_add_child(root_bus, "nexus", 0); @@ -141,12 +128,6 @@ configure(dummy) if (isa_bus_device) isa_probe_children(isa_bus_device); #endif - - /* - * Now we're ready to handle (pending) interrupts. - * XXX this is slightly misplaced. - */ - spl0(); } static void diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 56f0c84..3bfcfc8 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -59,14 +59,16 @@ * %rsi = newtd */ ENTRY(cpu_throw) - xorq %rax, %rax movl PCPU(CPUID), %eax testq %rdi,%rdi /* no thread? */ jz 1f /* release bit from old pm_active */ movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ - btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ +#ifdef SMP + lock +#endif + btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ movq PCB_CR3(%rdx),%rdx @@ -74,7 +76,10 @@ ENTRY(cpu_throw) /* set bit in new pm_active */ movq TD_PROC(%rsi),%rdx movq P_VMSPACE(%rdx), %rdx - btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ +#ifdef SMP + lock +#endif + btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ jmp sw1 /* @@ -143,7 +148,6 @@ ENTRY(cpu_switch) jz badsw3 /* no, panic */ #endif movq TD_PCB(%rsi),%r8 - xorq %rax, %rax movl PCPU(CPUID), %eax /* switch address space */ @@ -153,12 +157,18 @@ ENTRY(cpu_switch) /* Release bit from old pmap->pm_active */ movq TD_PROC(%rdi), %rdx /* oldproc */ movq P_VMSPACE(%rdx), %rdx - btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ +#ifdef SMP + lock +#endif + btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx - btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ +#ifdef SMP + lock +#endif + btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ sw1: /* @@ -191,8 +201,11 @@ sw1: wrmsr /* Update the TSS_RSP0 pointer for the next interrupt */ + movq PCPU(TSSP), %rax + addq $COMMON_TSS_RSP0, %rax leaq -16(%r8), %rbx - movq %rbx, common_tss + COMMON_TSS_RSP0 + movq %rbx, (%rax) + movq %rbx, PCPU(RSP0) /* Restore context. */ movq PCB_RBX(%r8),%rbx diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c index 3dd6a8a..077c914 100644 --- a/sys/amd64/amd64/db_interface.c +++ b/sys/amd64/amd64/db_interface.c @@ -98,6 +98,22 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) ef = read_rflags(); disable_intr(); +#ifdef SMP + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid), + PCPU_GET(other_cpus)); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + + /* We stop all CPUs except ourselves (obviously) */ + stop_cpus(PCPU_GET(other_cpus)); + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf(" stopped.\n"); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + +#endif /* SMP */ + switch (type) { case T_BPTFLT: /* breakpoint */ case T_TRCTRAP: /* debug exception */ @@ -192,6 +208,27 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) regs->tf_ds = ddb_regs.tf_ds & 0xffff; #endif +#ifdef SMP + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid), + stopped_cpus); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + + /* Restart all the CPUs we previously stopped */ + if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) { + db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", + PCPU_GET(other_cpus), stopped_cpus); + panic("stop_cpus() failed"); + } + restart_cpus(stopped_cpus); + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf(" restarted.\n"); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + +#endif /* SMP */ + write_rflags(ef); return (1); diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c index a05348a..7dba9bb 100644 --- a/sys/amd64/amd64/db_trace.c +++ b/sys/amd64/amd64/db_trace.c @@ -245,8 +245,9 @@ db_nextframe(fp, ip, p) if (strcmp(name, "calltrap") == 0 || strcmp(name, "fork_trampoline") == 0) frame_type = TRAP; - else if (strncmp(name, "Xintr", 5) == 0 || - strncmp(name, "Xfastintr", 9) == 0) + else if (strncmp(name, "Xatpic_intr", 11) == 0 || + strncmp(name, "Xatpic_fastintr", 15) == 0 || + strncmp(name, "Xapic_isr", 9) == 0) frame_type = INTERRUPT; else if (strcmp(name, "Xfast_syscall") == 0) frame_type = SYSCALL; diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 3d2eaa6..972f19c 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -35,14 +35,11 @@ */ #include <machine/asmacros.h> -#include <sys/mutex.h> #include <machine/psl.h> #include <machine/trap.h> #include "assym.s" -#define SEL_RPL_MASK 0x0003 - .text /*****************************************************************************/ @@ -72,8 +69,6 @@ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. */ -#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ - .type __CONCAT(X,name),@function; __CONCAT(X,name): MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) @@ -223,7 +218,7 @@ IDTVEC(page) IDTVEC(fast_syscall) swapgs movq %rsp,PCPU(SCRATCH_RSP) - movq common_tss+COMMON_TSS_RSP0,%rsp + movq PCPU(RSP0),%rsp /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ subq $TF_SIZE,%rsp /* defer TF_RSP till we have a spare register */ @@ -297,14 +292,6 @@ ENTRY(fork_trampoline) call fork_exit jmp doreti /* Handle any ASTs */ - -/* - * Include what was once config+isa-dependent code. - * XXX it should be in a stand-alone file. It's still icu-dependent and - * belongs in i386/isa. - */ -#include "amd64/isa/vector.S" - .data ALIGN_DATA @@ -406,5 +393,3 @@ doreti_iret_fault: movq $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ jmp alltraps_with_regs_pushed - -#include "amd64/isa/icu_ipl.S" diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 1e4890c..1acb931 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/psl.h> @@ -63,8 +64,6 @@ __FBSDID("$FreeBSD$"); #include <machine/segments.h> #include <machine/ucontext.h> -#include <amd64/isa/intr_machdep.h> - /* * Floating point support. */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 6a017e5..27a1a12 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -69,10 +69,12 @@ __FBSDID("$FreeBSD$"); #include <nfs/rpcv2.h> #include <nfsclient/nfs.h> #include <nfsclient/nfsdiskless.h> +#include <machine/apicreg.h> #include <machine/cpu.h> #include <machine/sigframe.h> #include <machine/proc.h> #include <machine/specialreg.h> +#include <machine/segments.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -83,11 +85,6 @@ ASSYM(P_UAREA, offsetof(struct proc, p_uarea)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); -ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); -ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); -ASSYM(TD_MD, offsetof(struct thread, td_md)); - -ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); @@ -180,6 +177,7 @@ ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); +ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MAXPATHLEN, MAXPATHLEN); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); @@ -189,12 +187,24 @@ ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp)); +ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); +ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp)); +ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0)); + +ASSYM(LA_VER, offsetof(struct LAPIC, version)); +ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); +ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); +ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); +ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); +ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); +ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL)); +ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); ASSYM(MSR_FSBASE, MSR_FSBASE); ASSYM(MSR_GSBASE, MSR_GSBASE); diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index ba8e58e..f3d70c2 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -55,12 +55,13 @@ __FBSDID("$FreeBSD$"); #include <machine/asmacros.h> #include <machine/clock.h> #include <machine/cputypes.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/segments.h> #include <machine/specialreg.h> #include <machine/md_var.h> #include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> /* XXX - should be in header file: */ void printcpuinfo(void); diff --git a/sys/amd64/amd64/io_apic.c b/sys/amd64/amd64/io_apic.c index 4af70fa..b620440 100644 --- a/sys/amd64/amd64/io_apic.c +++ b/sys/amd64/amd64/io_apic.c @@ -30,6 +30,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_atpic.h" #include "opt_isa.h" #include "opt_no_mixed_mode.h" @@ -50,8 +51,8 @@ __FBSDID("$FreeBSD$"); #include <machine/apicvar.h> #include <machine/segments.h> -#if defined(DEV_ISA) && !defined(NO_MIXED_MODE) -#define MIXED_MODE +#if defined(DEV_ISA) && defined(DEV_ATPIC) && !defined(NO_MIXED_MODE) +#define MIXED_MODE #endif #define IOAPIC_ISA_INTS 16 diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c index 6f942bf..bdff518 100644 --- a/sys/amd64/amd64/local_apic.c +++ b/sys/amd64/amd64/local_apic.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/kernel.h> #include <sys/pcpu.h> +#include <sys/proc.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -171,8 +172,7 @@ lapic_init(uintptr_t addr) KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t)); - setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); /* Perform basic initialization of the BSP's local APIC. */ value = lapic->svr; @@ -242,8 +242,7 @@ lapic_enable_intr(u_int irq) KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for IRQ %u", irq)); - setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(vector, ioint_handlers[vector / 32], SDT_SYSIGT, SEL_KPL, 0); } void @@ -478,13 +477,14 @@ lapic_eoi(void) } void -lapic_handle_intr(struct intrframe frame) +lapic_handle_intr(void *cookie, struct intrframe frame) { struct intsrc *isrc; + int vec = (uintptr_t)cookie; - if (frame.if_vec == -1) + if (vec == -1) panic("Couldn't get vector from ISR!"); - isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec)); + isrc = intr_lookup_source(apic_idt_to_irq(vec)); intr_execute_handlers(isrc, &frame); } @@ -589,21 +589,9 @@ static void apic_setup_local(void *dummy __unused) { int retval; - uint64_t apic_base; if (best_enum == NULL) return; - /* - * To work around an errata, we disable the local APIC on some - * CPUs during early startup. We need to turn the local APIC back - * on on such CPUs now. - */ - if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xff0) == 0x610) { - apic_base = rdmsr(MSR_APICBASE); - apic_base |= APICBASE_ENABLED; - wrmsr(MSR_APICBASE, apic_base); - } retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e32d82a..2140d7a 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_atalk.h" +#include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" @@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$"); #include <machine/reg.h> #include <machine/clock.h> #include <machine/specialreg.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/metadata.h> #include <machine/proc.h> @@ -108,9 +110,13 @@ __FBSDID("$FreeBSD$"); #include <machine/perfmon.h> #endif #include <machine/tss.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> + +#include <isa/isareg.h> #include <isa/rtc.h> #include <sys/ptrace.h> #include <machine/sigframe.h> @@ -146,7 +152,9 @@ vm_paddr_t phys_avail[10]; struct kva_md_info kmi; static struct trapframe proc0_tf; -static struct pcpu __pcpu; +struct region_descriptor r_gdt, r_idt; + +struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; @@ -196,7 +204,6 @@ cpu_startup(dummy) bufinit(); vm_pager_bufferinit(); - /* For SMP, we delay the cpu_setregs() until after SMP startup. */ cpu_setregs(); } @@ -589,13 +596,13 @@ SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, * Initialize segments & interrupt table */ -struct user_segment_descriptor gdt[NGDT];/* global descriptor table */ +struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); -struct amd64tss common_tss; +struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { @@ -755,6 +762,15 @@ ssdtosyssd(ssd, sd) sd->sd_gran = ssd->ssd_gran; } +#if !defined(DEV_ATPIC) && defined(DEV_ISA) +#include <isa/isavar.h> +u_int +isa_irq_pending(void) +{ + + return (0); +} +#endif #define PHYSMAP_SIZE (2 * 8) @@ -783,7 +799,6 @@ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx; - u_int extmem; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; @@ -802,12 +817,9 @@ getmemsize(caddr_t kmdp, u_int64_t first) * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); - if (smapbase == 0) - smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | 0x0009); /* Old value for MODINFOMD_SMAP */ - if (smapbase == 0) { + if (smapbase == NULL) panic("No BIOS smap info from loader!"); - goto deep_shit; - } + smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); @@ -816,14 +828,11 @@ getmemsize(caddr_t kmdp, u_int64_t first) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); - if (smap->type != 0x01) { + if (smap->type != 0x01) continue; - } - if (smap->length == 0) { -next_run: + if (smap->length == 0) continue; - } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -836,6 +845,7 @@ next_run: if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; +next_run: continue; } @@ -850,69 +860,23 @@ next_run: } /* - * Perform "base memory" related probes & setup based on SMAP + * Find the 'base memory' segment for SMP */ -deep_shit: - if (basemem == 0) { - for (i = 0; i <= physmap_idx; i += 2) { - if (physmap[i] == 0x00000000) { - basemem = physmap[i + 1] / 1024; - break; - } - } - - if (basemem == 0) { - basemem = rtcin(RTC_BASELO) + (rtcin(RTC_BASEHI) << 8); - } - - if (basemem == 0) { - basemem = 640; - } - - if (basemem > 640) { - printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", - basemem); - basemem = 640; + basemem = 0; + for (i = 0; i <= physmap_idx; i += 2) { + if (physmap[i] == 0x00000000) { + basemem = physmap[i + 1] / 1024; + break; } - -#if 0 - for (pa = trunc_page(basemem * 1024); - pa < ISA_HOLE_START; pa += PAGE_SIZE) - pmap_kenter(KERNBASE + pa, pa); -#endif } + if (basemem == 0) + panic("BIOS smap did not include a basemem segment!"); - if (physmap[1] != 0) - goto physmap_done; - - /* - * Prefer the RTC value for extended memory. - */ - extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); - - /* - * Special hack for chipsets that still remap the 384k hole when - * there's 16MB of memory - this really confuses people that - * are trying to use bus mastering ISA controllers with the - * "16MB limit"; they only have 16MB, but the remapping puts - * them beyond the limit. - * - * If extended memory is between 15-16MB (16-17MB phys address range), - * chop it to 15MB. - */ - if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) - extmem = 15 * 1024; - - physmap[0] = 0; - physmap[1] = basemem * 1024; - physmap_idx = 2; - physmap[physmap_idx] = 0x100000; - physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; +#ifdef SMP + /* make hole for AP bootstrap code */ + physmap[1] = mp_bootaddress(physmap[1] / 1024); +#endif -physmap_done: - /* - * Now, physmap contains a map of physical memory. - */ /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be @@ -929,7 +893,8 @@ physmap_done: * hw.physmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ - if ((cp = getenv("hw.physmem")) != NULL) { + cp = getenv("hw.physmem"); + if (cp != NULL) { u_int64_t AllowMem, sanity; char *ep; @@ -1106,11 +1071,18 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, off, x; - struct region_descriptor r_gdt, r_idt; struct pcpu *pc; u_int64_t msr; char *env; +#ifdef DEV_ISA + /* Preemptively mask the atpics and leave them shut down */ + outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); + outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); +#else +#error "have you forgotten the isa device?"; +#endif + /* Turn on PTE NX (no execute) bit */ msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); @@ -1146,7 +1118,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) /* * make gdt memory segments */ - gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss; + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) @@ -1157,7 +1129,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); - pc = &__pcpu; + pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); @@ -1166,6 +1138,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); + PCPU_SET(tssp, &common_tss[0]); /* * Initialize mutexes. @@ -1211,8 +1184,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) */ cninit(); -#ifdef DEV_ISA - isa_defaultirq(); +#ifdef DEV_ATPIC + atpic_startup(); #endif #ifdef DDB @@ -1225,12 +1198,14 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ - common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); + common_tss[0].tss_rsp0 = thread0.td_kstack + \ + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); /* Ensure the stack is aligned to 16 bytes */ - common_tss.tss_rsp0 &= ~0xF; + common_tss[0].tss_rsp0 &= ~0xF; + PCPU_SET(rsp0, common_tss[0].tss_rsp0); /* doublefault stack space, runs on ist1 */ - common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index 56d268c..aeb2e35 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -323,6 +323,15 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); } +#ifdef SMP +void +mem_range_AP_init(void) +{ + if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) + (mem_range_softc.mr_op->initAP(&mem_range_softc)); +} +#endif + static int mem_modevent(module_t mod, int type, void *data) { diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f58a94f..fcd478b 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 1996, by Steve Passe + * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,30 +27,12 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" -#if !defined(lint) -#if !defined(SMP) -#error How did you get here? -#endif - -#if defined(I386_CPU) && !defined(COMPILING_LINT) -#error SMP not supported with I386_CPU -#endif -#ifndef DEV_APIC -#error The apic device is required for SMP, add "device apic" to your config file. -#endif -#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) -#error SMP not supported with CPU_DISABLE_CMPXCHG -#endif -#endif /* not lint */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> -#include <sys/cons.h> /* cngetc() */ #ifdef GPROF #include <sys/gmon.h> #endif @@ -75,9 +58,8 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/smp.h> -#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */ #include <machine/specialreg.h> -#include <machine/privatespace.h> +#include <machine/tss.h> #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) @@ -88,67 +70,9 @@ __FBSDID("$FreeBSD$"); #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) -/* - * this code MUST be enabled here and in mpboot.s. - * it follows the very early stages of AP boot by placing values in CMOS ram. - * it NORMALLY will never be needed and thus the primitive method for enabling. - * -#define CHECK_POINTS - */ - -#if defined(CHECK_POINTS) && !defined(PC98) -#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) -#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) - -#define CHECK_INIT(D); \ - CHECK_WRITE(0x34, (D)); \ - CHECK_WRITE(0x35, (D)); \ - CHECK_WRITE(0x36, (D)); \ - CHECK_WRITE(0x37, (D)); \ - CHECK_WRITE(0x38, (D)); \ - CHECK_WRITE(0x39, (D)); - -#define CHECK_PRINT(S); \ - printf("%s: %d, %d, %d, %d, %d, %d\n", \ - (S), \ - CHECK_READ(0x34), \ - CHECK_READ(0x35), \ - CHECK_READ(0x36), \ - CHECK_READ(0x37), \ - CHECK_READ(0x38), \ - CHECK_READ(0x39)); - -#else /* CHECK_POINTS */ - -#define CHECK_INIT(D) -#define CHECK_PRINT(S) -#define CHECK_WRITE(A, D) - -#endif /* CHECK_POINTS */ - -/* - * Values to send to the POST hardware. - */ -#define MP_BOOTADDRESS_POST 0x10 -#define MP_PROBE_POST 0x11 -#define MPTABLE_PASS1_POST 0x12 - -#define MP_START_POST 0x13 -#define MP_ENABLE_POST 0x14 -#define MPTABLE_PASS2_POST 0x15 - -#define START_ALL_APS_POST 0x16 -#define INSTALL_AP_TRAMP_POST 0x17 -#define START_AP_POST 0x18 - -#define MP_ANNOUNCE_POST 0x19 - /* lock region used by kernel profiling */ int mcount_lock; -/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ -int current_postcode; - int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern int nkpt; @@ -164,6 +88,9 @@ struct cpu_top *smp_topology; char *bootSTK; static int bootAP; +/* Free these after use */ +void *bootstacks[MAXCPU]; + /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -178,6 +105,8 @@ vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; struct mtx smp_tlb_mtx; +extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); + /* * Local data and functions. */ @@ -201,17 +130,17 @@ struct cpu_info { } static cpu_info[MAXCPU]; static int cpu_apic_ids[MAXCPU]; -static u_int boot_address; +static u_int boot_address; static void set_logical_apic_ids(void); static int start_all_aps(void); -static void install_ap_tramp(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static int hlt_cpus_mask; static int hlt_logical_cpus; static struct sysctl_ctx_list logical_cpu_clist; +static u_int bootMP_size; /* * Calculate usable address in base memory for AP trampoline code. @@ -219,13 +148,15 @@ static struct sysctl_ctx_list logical_cpu_clist; u_int mp_bootaddress(u_int basemem) { - POSTCODE(MP_BOOTADDRESS_POST); - boot_address = trunc_page(basemem); /* round down to 4k boundary */ + bootMP_size = mptramp_end - mptramp_start; + boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ if ((basemem - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ + /* 3 levels of page table pages */ + mptramp_pagetables = boot_address - (PAGE_SIZE * 3); - return boot_address; + return mptramp_pagetables; } void @@ -302,43 +233,34 @@ cpu_mp_start(void) { int i; - POSTCODE(MP_START_POST); - /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) cpu_apic_ids[i] = -1; /* Install an inter-CPU IPI for TLB invalidation */ - setidt(IPI_INVLTLB, IDTVEC(invltlb), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLPG, IDTVEC(invlpg), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLRNG, IDTVEC(invlrng), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forwarding hardclock() */ - setidt(IPI_HARDCLOCK, IDTVEC(hardclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_HARDCLOCK, IDTVEC(hardclock), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forwarding statclock() */ - setidt(IPI_STATCLOCK, IDTVEC(statclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_STATCLOCK, IDTVEC(statclock), SDT_SYSIGT, SEL_KPL, 0); +#ifdef LAZY_SWITCH /* Install an inter-CPU IPI for lazy pmap release */ - setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYSIGT, SEL_KPL, 0); +#endif /* Install an inter-CPU IPI for all-CPU rendezvous */ - setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forcing an additional software trap */ - setidt(IPI_AST, IDTVEC(cpuast), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_AST, IDTVEC(cpuast), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ - setidt(IPI_STOP, IDTVEC(cpustop), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN); @@ -371,8 +293,6 @@ cpu_mp_announce(void) { int i, x; - POSTCODE(MP_ANNOUNCE_POST); - /* List CPUs */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1, x = 0; x < MAXCPU; x++) { @@ -390,38 +310,41 @@ cpu_mp_announce(void) void init_secondary(void) { - int gsel_tss; - int x, myid; - u_int cr0; - - /* bootAP is set in start_ap() to our ID. */ - myid = bootAP; - gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; - gdt_segs[GPROC0_SEL].ssd_base = - (int) &SMP_prvspace[myid].pcpu.pc_common_tss; - SMP_prvspace[myid].pcpu.pc_prvspace = - &SMP_prvspace[myid].pcpu; - - for (x = 0; x < NGDT; x++) { - ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); - } + struct pcpu *pc; + u_int64_t msr, cr0; + int cpu, gsel_tss; + + /* Set by the startup code for us to use */ + cpu = bootAP; + + /* Init tss */ + common_tss[cpu] = common_tss[0]; + common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ + + gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); - r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; - r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ - lidt(&r_idt); + /* Get per-cpu data */ + pc = &__pcpu[cpu]; + + /* prime data page for it to use */ + pcpu_init(pc, cpu, sizeof(struct pcpu)); + pc->pc_apic_id = cpu_apic_ids[cpu]; + pc->pc_prvspace = pc; + pc->pc_curthread = 0; + pc->pc_tssp = &common_tss[cpu]; + pc->pc_rsp0 = 0; - lldt(_default_ldt); - PCPU_SET(currentldt, _default_ldt); + wrmsr(MSR_FSBASE, 0); /* User value */ + wrmsr(MSR_GSBASE, (u_int64_t)pc); + wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ + + lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); - gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ - PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); - PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); - PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); - PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); /* @@ -432,32 +355,32 @@ init_secondary(void) cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); - CHECK_WRITE(0x38, 5); - - /* Disable local APIC just to be sure. */ + + /* Set up the fast syscall stuff */ + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + + /* Disable local apic just to be sure. */ lapic_disable(); /* signal our startup to the BSP. */ mp_naps++; - CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); - /* BSP may have changed PTD while we were waiting */ - invltlb(); - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - lidt(&r_idt); -#endif - /* set up CPU registers and state */ cpu_setregs(); /* set up FPU state on the AP */ - npxinit(__INITIAL_NPXCW__); + fpuinit(); /* set up SSE registers */ enable_sse(); @@ -467,7 +390,6 @@ init_secondary(void) printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); - printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]); panic("cpuid mismatch! boom!!"); } @@ -559,39 +481,51 @@ set_logical_apic_ids(void) static int start_all_aps(void) { -#ifndef PC98 u_char mpbiosreason; -#endif - u_long mpbioswarmvec; - struct pcpu *pc; - char *stack; - uintptr_t kptbase; - int i, pg, apic_id, cpu; - - POSTCODE(START_ALL_APS_POST); + u_int32_t mpbioswarmvec; + int apic_id, cpu, i; + u_int64_t *pt4, *pt3, *pt2; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ - install_ap_tramp(); + pmap_kenter(boot_address + KERNBASE, boot_address); + bcopy(mptramp_start, (void *)((uintptr_t)boot_address + KERNBASE), bootMP_size); + + /* Locate the page tables, they'll be below the trampoline */ + pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); + pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); + pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); + + /* Create the initial 1GB replicated page tables */ + for (i = 0; i < 512; i++) { + /* Each slot of the level 4 pages points to the same level 3 page */ + pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); + pt4[i] |= PG_V | PG_RW | PG_U; + + /* Each slot of the level 3 pages points to the same level 2 page */ + pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); + pt3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + pt2[i] = i * (2 * 1024 * 1024); + pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } /* save the current value of the warm-start vector */ - mpbioswarmvec = *((u_long *) WARMBOOT_OFF); -#ifndef PC98 + mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); -#endif - /* set up temporary P==V mapping for AP boot */ - /* XXX this is a hack, we should boot the AP on its own stack/PTD */ - kptbase = (uintptr_t)(void *)KPTphys; - for (i = 0; i < NKPT; i++) - PTD[i] = (pd_entry_t)(PG_V | PG_RW | - ((kptbase + i * PAGE_SIZE) & PG_FRAME)); - invltlb(); + /* setup a vector to our boot code */ + *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* start each AP */ - for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { + cpu = 0; + for (apic_id = 0; apic_id < MAXCPU; apic_id++) { if (!cpu_info[apic_id].cpu_present || cpu_info[apic_id].cpu_bsp) continue; @@ -600,48 +534,18 @@ start_all_aps(void) /* save APIC ID for this logical ID */ cpu_apic_ids[cpu] = apic_id; - /* first page of AP's private space */ - pg = cpu * i386_btop(sizeof(struct privatespace)); - - /* allocate a new private data page */ - pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE); - - /* wire it into the private page table page */ - SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc)); - /* allocate and set up an idle stack data page */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */ - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[pg + 1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - - /* prime data page for it to use */ - pcpu_init(pc, cpu, sizeof(struct pcpu)); - pc->pc_apic_id = apic_id; - - /* setup a vector to our boot code */ - *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; - *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); -#ifndef PC98 - outb(CMOS_REG, BIOS_RESET); - outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ -#endif + bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES * - PAGE_SIZE]; + bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ - CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { - printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); - CHECK_PRINT("trace"); /* show checkpoints */ - /* better panic as the AP may be running loose */ - printf("panic y/n? [y] "); - if (cngetc() != 'n') - panic("bye-bye"); + /* restore the warmstart vector */ + *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; + panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } - CHECK_PRINT("trace"); /* show checkpoints */ all_cpus |= (1 << cpu); /* record AP in CPU map */ } @@ -650,92 +554,15 @@ start_all_aps(void) PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); /* restore the warmstart vector */ - *(u_long *) WARMBOOT_OFF = mpbioswarmvec; -#ifndef PC98 + *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; + outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); -#endif - - /* - * Set up the idle context for the BSP. Similar to above except - * that some was done by locore, some by pmap.c and some is implicit - * because the BSP is cpu#0 and the page is initially zero and also - * because we can refer to variables by name on the BSP.. - */ - - /* Allocate and setup BSP idle stack */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - - for (i = 0; i < NKPT; i++) - PTD[i] = 0; - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); /* number of APs actually started */ return mp_naps; } -/* - * load the 1st level AP boot code into base memory. - */ - -/* targets for relocation */ -extern void bigJump(void); -extern void bootCodeSeg(void); -extern void bootDataSeg(void); -extern void MPentry(void); -extern u_int MP_GDT; -extern u_int mp_gdtbase; - -static void -install_ap_tramp(void) -{ - int x; - int size = *(int *) ((u_long) & bootMP_size); - u_char *src = (u_char *) ((u_long) bootMP); - u_char *dst = (u_char *) boot_address + KERNBASE; - u_int boot_base = (u_int) bootMP; - u_int8_t *dst8; - u_int16_t *dst16; - u_int32_t *dst32; - - POSTCODE(INSTALL_AP_TRAMP_POST); - - pmap_kenter(boot_address + KERNBASE, boot_address); - for (x = 0; x < size; ++x) - *dst++ = *src++; - - /* - * modify addresses in code we just moved to basemem. unfortunately we - * need fairly detailed info about mpboot.s for this to work. changes - * to mpboot.s might require changes here. - */ - - /* boot code is located in KERNEL space */ - dst = (u_char *) boot_address + KERNBASE; - - /* modify the lgdt arg */ - dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); - *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); - - /* modify the ljmp target for MPentry() */ - dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); - *dst32 = ((u_int) MPentry - KERNBASE); - - /* modify the target for boot code segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; - - /* modify the target for boot data segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; -} /* * This function starts the AP (application processor) identified @@ -750,8 +577,6 @@ start_ap(int apic_id) int vector, ms; int cpus; - POSTCODE(START_AP_POST); - /* calculate the vector */ vector = (boot_address >> 12) & 0xff; @@ -810,50 +635,14 @@ start_ap(int apic_id) DELAY(200); /* wait ~200uS */ /* Wait up to 5 seconds for it to start. */ - for (ms = 0; ms < 5000; ms++) { + for (ms = 0; ms < 50; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ - DELAY(1000); + DELAY(100000); } return 0; /* return FAILURE */ } -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif /* COUNT_XINVLTLB_HITS */ - /* * Flush the TLB on all other CPU's */ @@ -966,69 +755,49 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse void smp_invltlb(void) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } } void smp_invlpg(vm_offset_t addr) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } } void smp_masked_invltlb(u_int mask) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } } void smp_masked_invlpg(u_int mask, vm_offset_t addr) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } } void smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } } diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index 8f42f6b..ca53a87 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -1,5 +1,5 @@ -/* - * Copyright (c) 1995, Jack F. Vogel +/*- + * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,16 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Jack F. Vogel - * 4. The name of the developer may be used to endorse or promote products - * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -28,257 +23,214 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * mpboot.s: FreeBSD machine support for the Intel MP Spec - * multiprocessor systems. - * * $FreeBSD$ */ -#include "opt_pmap.h" - #include <machine/asmacros.h> /* miscellaneous asm macros */ -#include <machine/apicreg.h> #include <machine/specialreg.h> #include "assym.s" -#define R(x) ((x)-KERNBASE) - -/* - * this code MUST be enabled here and in mp_machdep.c - * it follows the very early stages of AP boot by placing values in CMOS ram. - * it NORMALLY will never be needed and thus the primitive method for enabling. - * -#define CHECK_POINTS - */ - -#if defined(CHECK_POINTS) && !defined(PC98) - -#define CMOS_REG (0x70) -#define CMOS_DATA (0x71) - -#define CHECKPOINT(A,D) \ - movb $(A),%al ; \ - outb %al,$CMOS_REG ; \ - movb $(D),%al ; \ - outb %al,$CMOS_DATA + .data /* So we can modify it */ -#else - -#define CHECKPOINT(A,D) - -#endif /* CHECK_POINTS */ - - -/* - * the APs enter here from their trampoline code (bootMP, below) - */ - .p2align 4 - -NON_GPROF_ENTRY(MPentry) - CHECKPOINT(0x36, 3) + .p2align 4,0 + .globl mptramp_start +mptramp_start: + .code16 /* - * Enable features on this processor. We don't support SMP on - * CPUs older than a Pentium, so we know that we can use the cpuid - * instruction. + * The AP enters here in response to the startup IPI. + * We are in real mode. %cs is the only segment register set. */ - movl $1,%eax - cpuid /* Retrieve features */ - movl %cr4,%eax -#ifndef DISABLE_PSE - testl $CPUID_PSE,%edx - jz 1f - orl $CR4_PSE,%eax /* Enable PSE */ -1: -#endif -#ifndef DISABLE_PG_G - testl $CPUID_PGE,%edx - jz 1f - orl $CR4_PGE,%eax /* Enable PGE */ -1: -#endif - testl $CPUID_VME,%edx - jz 1f - orl $CR4_VME,%eax /* Enable VME */ -1: - movl %eax,%cr4 - - /* Now enable paging mode */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl %eax, %cr3 - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 -#else - movl R(IdlePTD), %eax - movl %eax,%cr3 -#endif - movl %cr0,%eax - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* let the games begin! */ - movl bootSTK,%esp /* boot stack end loc. */ - - pushl $mp_begin /* jump to high mem */ - ret + cli /* make sure no interrupts */ + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss /* - * Wait for the booting CPU to signal startup + * Find relocation base and patch the gdt descript and ljmp targets */ -mp_begin: /* now running relocated at KERNBASE */ - CHECKPOINT(0x37, 4) - call init_secondary /* load i386 tables */ - -/* - * This is the embedded trampoline or bootstrap that is - * copied into 'real-mode' low memory, it is where the - * secondary processor "wakes up". When it is executed - * the processor will eventually jump into the routine - * MPentry, which resides in normal kernel text above - * 1Meg. -jackv - */ + xorl %ebx,%ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ + orl %ebx, lgdt_desc-mptramp_start+2 + orl %ebx, jmp_32-mptramp_start+2 + orl %ebx, jmp_64-mptramp_start+1 - .data - ALIGN_DATA /* just to be sure */ + /* + * Load the descriptor table pointer. We'll need it when running + * in 16 bit protected mode. + */ + lgdt lgdt_desc-mptramp_start -BOOTMP1: + /* Enable protected mode */ + movl $CR0_PE, %eax + mov %eax, %cr0 -NON_GPROF_ENTRY(bootMP) - .code16 - cli - CHECKPOINT(0x34, 1) - /* First guarantee a 'clean slate' */ - xorl %eax, %eax - movl %eax, %ebx - movl %eax, %ecx - movl %eax, %edx - movl %eax, %esi - movl %eax, %edi + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode-gdt, $protmode-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +jmp_32: + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long protmode-mptramp_start /* offset in segment */ + .word bootcode-gdt /* index in gdt for 32 bit code */ - /* set up data segments */ - mov %cs, %ax + /* + * At this point, we are running in 32 bit legacy protected mode. + */ + .code32 +protmode: + mov $bootdata-gdt, %eax mov %ax, %ds - mov %ax, %es - mov %ax, %fs - mov %ax, %gs - mov %ax, %ss - mov $(boot_stk-bootMP), %esp - /* Now load the global descriptor table */ - lgdt MP_GDTptr-bootMP + /* Turn on the PAE, PSE and PGE bits for when paging is enabled */ + mov %cr4, %eax + orl $(CR4_PAE | CR4_PSE), %eax + mov %eax, %cr4 - /* Enable protected mode */ - movl %cr0, %eax - orl $CR0_PE, %eax - movl %eax, %cr0 + /* + * Enable EFER.LME so that we get long mode when all the prereqs are + * in place. In this case, it turns on when CR0_PG is finally enabled. + * Pick up a few other EFER bits that we'll use need we're here. + */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME | EFER_SCE | EFER_NXE, %eax + wrmsr /* - * make intrasegment jump to flush the processor pipeline and - * reload CS register + * Point to the embedded page tables for startup. Note that this + * only gets accessed after we're actually in 64 bit mode, however + * we can only set the bottom 32 bits of %cr3 in this state. This + * means we are required to use a temporary page table that is below + * the 4GB limit. %ebx is still our relocation base. We could just + * subtract 3 * PAGE_SIZE, but that would be too easy. */ - pushl $0x18 - pushl $(protmode-bootMP) - lretl + leal mptramp_pagetables-mptramp_start(%ebx),%eax + movl (%eax), %eax + mov %eax, %cr3 - .code32 -protmode: - CHECKPOINT(0x35, 2) + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 /* - * we are NOW running for the first time with %eip - * having the full physical address, BUT we still - * are using a segment descriptor with the origin - * not matching the booting kernel. + * At this point paging is enabled, and we are in "compatability" mode. + * We do another far jump to reload %cs with the 64 bit selector. + * %cr3 points to a 4-level page table page. + * We cannot yet jump all the way to the kernel because we can only + * specify a 32 bit linear address. So, yet another trampoline. * - * SO NOW... for the BIG Jump into kernel's segment - * and physical text above 1 Meg. + * The following instruction is: + * ljmp $kernelcode-gdt, $tramp_64-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. */ - mov $0x10, %ebx - movw %bx, %ds - movw %bx, %es - movw %bx, %fs - movw %bx, %gs - movw %bx, %ss - - .globl bigJump -bigJump: - /* this will be modified by mpInstallTramp() */ - ljmp $0x08, $0 /* far jmp to MPentry() */ - -dead: hlt /* We should never get here */ - jmp dead - -/* - * MP boot strap Global Descriptor Table - */ - .p2align 4 - .globl MP_GDT - .globl bootCodeSeg - .globl bootDataSeg -MP_GDT: - -nulldesc: /* offset = 0x0 */ +jmp_64: + .byte 0xea /* opcode for far jump */ + .long tramp_64-mptramp_start /* offset in segment */ + .word kernelcode-gdt /* index in gdt for 64 bit code */ - .word 0x0 - .word 0x0 - .byte 0x0 - .byte 0x0 - .byte 0x0 - .byte 0x0 - -kernelcode: /* offset = 0x08 */ - - .word 0xffff /* segment limit 0..15 */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x0 /* segment base 16..23; set for 0K */ - .byte 0x9f /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /* segment base 24..32 */ - -kerneldata: /* offset = 0x10 */ - - .word 0xffff /* segment limit 0..15 */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x0 /* segment base 16..23; set for 0k */ - .byte 0x93 /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /* segment base 24..32 */ + /* + * Yeehar! We're running in 64 bit mode! We can mostly ignore our + * segment registers, and get on with it. + * Note that we are running at the correct virtual address, but with + * a 1:1 1GB mirrored mapping over entire address space. We had better + * switch to a real %cr3 promptly so that we can get to the direct map + * space. Remember that jmp is relative and that we've been relocated, + * so use an indirect jump. + */ + .code64 +tramp_64: + movabsq $entry_64,%rax /* 64 bit immediate load */ + jmp *%rax -bootcode: /* offset = 0x18 */ + .p2align 4,0 +gdt: + /* + * All segment descriptor tables start with a null descriptor + */ + .long 0x00000000 + .long 0x00000000 - .word 0xffff /* segment limit 0..15 */ -bootCodeSeg: /* this will be modified by mpInstallTramp() */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ - .byte 0x9e /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /*segment base 24..32 */ + /* + * This is the 64 bit long mode code descriptor. There is no + * 64 bit data descriptor. + */ +kernelcode: + .long 0x00000000 + .long 0x00209800 -bootdata: /* offset = 0x20 */ + /* + * This is the descriptor for the 32 bit boot code. + * %cs: +A, +R, -C, DPL=0, +P, +D, +G + * Accessed, Readable, Present, 32 bit, 4G granularity + */ +bootcode: + .long 0x0000ffff + .long 0x00cf9b00 - .word 0xffff -bootDataSeg: /* this will be modified by mpInstallTramp() */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ - .byte 0x92 - .byte 0xcf - .byte 0x0 + /* + * This is the descriptor for the 32 bit boot data. + * We load it into %ds and %ss. The bits for each selector + * are interpreted slightly differently. + * %ds: +A, +W, -E, DPL=0, +P, +D, +G + * %ss: +A, +W, -E, DPL=0, +P, +B, +G + * Accessed, Writeable, Expand up, Present, 32 bit, 4GB + * For %ds, +D means 'default operand size is 32 bit'. + * For %ss, +B means the stack register is %esp rather than %sp. + */ +bootdata: + .long 0x0000ffff + .long 0x00cf9300 -/* - * GDT pointer for the lgdt call - */ - .globl mp_gdtbase +gdtend: -MP_GDTptr: -mp_gdtlimit: - .word 0x0028 -mp_gdtbase: /* this will be modified by mpInstallTramp() */ + /* + * The address of our page table pages that the boot code + * uses to trampoline up to kernel address space. + */ + .globl mptramp_pagetables +mptramp_pagetables: .long 0 - .space 0x100 /* space for boot_stk - 1st temporary stack */ -boot_stk: + /* + * The pseudo descriptor for lgdt to use. + */ +lgdt_desc: + .word gdtend-gdt /* Length */ + .long gdt-mptramp_start /* Offset plus %ds << 4 */ + + .globl mptramp_end +mptramp_end: -BOOTMP2: - .globl bootMP_size -bootMP_size: - .long BOOTMP2 - BOOTMP1 + /* + * From here on down is executed in the kernel .text section. + * + * Load a real %cr3 that has all the direct map stuff and switches + * off the 1GB replicated mirror. Load a stack pointer and jump + * into AP startup code in C. + */ + .text + .code64 + .p2align 4,0 +entry_64: + movq KPML4phys, %rax + movq %rax, %cr3 + movq bootSTK, %rsp + jmp init_secondary diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 908e65a..f0a9883 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -27,7 +27,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_mptable_force_htt.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> @@ -57,13 +56,8 @@ __FBSDID("$FreeBSD$"); #define NAPICID 32 /* Max number of I/O APIC's */ -#ifdef PC98 -#define BIOS_BASE (0xe8000) -#define BIOS_SIZE (0x18000) -#else #define BIOS_BASE (0xf0000) #define BIOS_SIZE (0x10000) -#endif #define BIOS_COUNT (BIOS_SIZE/4) typedef void mptable_entry_handler(u_char *entry, void *arg); @@ -226,11 +220,12 @@ static int mptable_probe(void) { int x; - u_long segment; + u_int32_t segment; u_int32_t target; /* see if EBDA exists */ - if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { + segment = (u_int32_t) *(u_short *)(KERNBASE + 0x40e); + if (segment != 0) { /* search first 1K of EBDA */ target = (u_int32_t) (segment << 4); if ((x = search_for_sig(target, 1024 / 4)) >= 0) diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c index 952ceaf..1fab16b 100644 --- a/sys/amd64/amd64/nexus.c +++ b/sys/amd64/amd64/nexus.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/module.h> #include <machine/bus.h> +#include <machine/intr_machdep.h> #include <sys/rman.h> #include <sys/interrupt.h> @@ -64,8 +65,6 @@ __FBSDID("$FreeBSD$"); #include <isa/isavar.h> #include <amd64/isa/isa.h> #endif -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> #include <sys/rtprio.h> static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); @@ -156,14 +155,11 @@ nexus_probe(device_t dev) * multi-ISA-bus systems. PCI interrupts are routed to the ISA * component, so in a way, PCI can be a partial child of an ISA bus(!). * APIC interrupts are global though. - * - * XXX We depend on the AT PIC driver correctly claiming IRQ 2 - * to prevent its reuse elsewhere. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; - irq_rman.rm_end = 15; + irq_rman.rm_end = NUM_IO_INTS - 1; if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, irq_rman.rm_start, irq_rman.rm_end)) @@ -428,7 +424,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, if (error) return (error); - error = inthand_add(device_get_nameunit(child), irq->r_start, + error = intr_add_handler(device_get_nameunit(child), irq->r_start, ihand, arg, flags, cookiep); return (error); @@ -437,7 +433,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { - return (inthand_remove(ih)); + return (intr_remove_handler(ih)); } static int diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 4e4c124..b2f0c18 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -118,6 +118,9 @@ __FBSDID("$FreeBSD$"); #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> +#ifdef SMP +#include <sys/smp.h> +#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -134,6 +137,9 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> +#ifdef SMP +#include <machine/smp.h> +#endif #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -163,6 +169,11 @@ struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; +#ifdef LAZY_SWITCH +#ifdef SMP +static struct mtx lazypmap_lock; +#endif +#endif vm_paddr_t avail_start; /* PA of first available physical page */ vm_paddr_t avail_end; /* PA of last available physical page */ @@ -477,6 +488,11 @@ pmap_bootstrap(firstaddr) kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); +#ifdef LAZY_SWITCH +#ifdef SMP + mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN); +#endif +#endif mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); @@ -630,8 +646,121 @@ pmap_track_modified(vm_offset_t va) return 0; } +#ifdef SMP /* - * Normal invalidation functions. + * For SMP, these functions have to use the IPI mechanism for coherence. + */ +void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + u_int cpumask; + u_int other_cpus; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invlpg(va); + smp_invlpg(va); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invlpg(va); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg(pmap->pm_active & other_cpus, va); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} + +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + u_int cpumask; + u_int other_cpus; + vm_offset_t addr; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + smp_invlpg_range(sva, eva); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg_range(pmap->pm_active & other_cpus, + sva, eva); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} + +void +pmap_invalidate_all(pmap_t pmap) +{ + u_int cpumask; + u_int other_cpus; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invltlb(); + smp_invltlb(); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invltlb(); + if (pmap->pm_active & other_cpus) + smp_masked_invltlb(pmap->pm_active & other_cpus); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} +#else /* !SMP */ +/* + * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void @@ -659,6 +788,7 @@ pmap_invalidate_all(pmap_t pmap) if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } +#endif /* !SMP */ /* * Are we current address space or kernel? @@ -1208,6 +1338,93 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ +#ifdef LAZY_SWITCH +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static u_int *lazymask; +static register_t lazyptd; +static volatile u_int lazywait; + +void pmap_lazyfix_action(void); + +void +pmap_lazyfix_action(void) +{ + u_int mymask = PCPU_GET(cpumask); + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); + atomic_store_rel_int(&lazywait, 1); +} + +static void +pmap_lazyfix_self(u_int mymask) +{ + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); +} + + +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int mymask = PCPU_GET(cpumask); + u_int mask; + register u_int spins; + + while ((mask = pmap->pm_active) != 0) { + spins = 50000000; + mask = mask & -mask; /* Find least significant set bit */ + mtx_lock_spin(&lazypmap_lock); + lazyptd = vtophys(pmap->pm_pml4); + if (mask == mymask) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(mymask); + } else { + atomic_store_rel_long((u_long *)&lazymask, + (u_long)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + ia32_pause(); + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&lazypmap_lock); + if (spins == 0) + printf("pmap_lazyfix: spun for 50000000\n"); + } +} + +#else /* SMP */ + +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + u_long cr3; + + cr3 = vtophys(pmap->pm_pml4); + if (cr3 == rcr3()) { + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + pmap->pm_active &= ~(PCPU_GET(cpumask)); + } +} +#endif /* SMP */ +#endif + /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1222,6 +1439,9 @@ pmap_release(pmap_t pmap) ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); +#ifdef LAZY_SWITCH + pmap_lazyfix(pmap); +#endif mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); @@ -2777,12 +2997,21 @@ void pmap_activate(struct thread *td) { struct proc *p = td->td_proc; - pmap_t pmap; + pmap_t pmap, oldpmap; u_int64_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); + oldpmap = PCPU_GET(curpmap); +#ifdef SMP +if (oldpmap) /* XXX FIXME */ + atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); + atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); +#else +if (oldpmap) /* XXX FIXME */ + oldpmap->pm_active &= ~PCPU_GET(cpumask); pmap->pm_active |= PCPU_GET(cpumask); +#endif cr3 = vtophys(pmap->pm_pml4); /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 94d7bba..2c0ddf1 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -37,10 +37,21 @@ #include "opt_ddb.h" #include <machine/asmacros.h> +#include <machine/intr_machdep.h> #include <machine/pmap.h> #include "assym.s" + ALIGN_DATA + .globl intrcnt, eintrcnt +intrcnt: + .space INTRCNT_COUNT * 4 +eintrcnt: + + .globl intrnames, eintrnames +intrnames: + .space INTRCNT_COUNT * (MAXCOMLEN + 1) +eintrnames: .text @@ -302,6 +313,9 @@ ENTRY(casuptr) ja fusufault movq %rsi, %rax /* old */ +#ifdef SMP + lock +#endif cmpxchgq %rdx, (%rdi) /* new = %rdx */ /* diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 65ae8f9..0e6b95b 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -80,13 +80,14 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <machine/cpu.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/pcb.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <machine/tss.h> -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> - #include <ddb/ddb.h> extern void trap(struct trapframe frame); @@ -564,6 +565,11 @@ trap_fatal(frame, eva) printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg[type], ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + printf("cpuid = %d; ", PCPU_GET(cpuid)); + printf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s, %s\n", @@ -631,6 +637,11 @@ void dblfault_handler() { printf("\nFatal double fault\n"); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + printf("cpuid = %d; ", PCPU_GET(cpuid)); + printf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif panic("double fault"); } diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 5485511..6a5b17c 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -77,14 +77,26 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %lu Hz\n", tsc_freq); - - return; } + void init_TSC_tc(void) { +#ifdef SMP + /* + * We can not use the TSC in SMP mode unless the TSCs on all CPUs + * are somehow synchronized. Some hardware configurations do + * this, but we have no way of determining whether this is the + * case, so we do not use the TSC in multi-processor systems + * unless the user indicated (by setting kern.timecounter.smp_tsc + * to 1) that he believes that his TSCs are synchronized. + */ + if (mp_ncpus > 1 && !smp_tsc) + tsc_timecounter.tc_quality = -100; +#endif + if (tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 9b6bc1f..c4f583f 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/mutex.h> #include <sys/sf_buf.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/unistd.h> @@ -80,6 +81,11 @@ __FBSDID("$FreeBSD$"); #include <amd64/isa/isa.h> static void cpu_reset_real(void); +#ifdef SMP +static void cpu_reset_proxy(void); +static u_int cpu_reset_proxyid; +static volatile u_int cpu_reset_proxy_active; +#endif static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) @@ -336,10 +342,69 @@ cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) * Force reset the processor by invalidating the entire address space! */ +#ifdef SMP +static void +cpu_reset_proxy() +{ + + cpu_reset_proxy_active = 1; + while (cpu_reset_proxy_active == 1) + ; /* Wait for other cpu to see that we've started */ + stop_cpus((1<<cpu_reset_proxyid)); + printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); + DELAY(1000000); + cpu_reset_real(); +} +#endif + void cpu_reset() { +#ifdef SMP + if (smp_active == 0) { + cpu_reset_real(); + /* NOTREACHED */ + } else { + + u_int map; + int cnt; + printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid)); + + map = PCPU_GET(other_cpus) & ~ stopped_cpus; + + if (map != 0) { + printf("cpu_reset: Stopping other CPUs\n"); + stop_cpus(map); /* Stop all other CPUs */ + } + + if (PCPU_GET(cpuid) == 0) { + DELAY(1000000); + cpu_reset_real(); + /* NOTREACHED */ + } else { + /* We are not BSP (CPU #0) */ + + cpu_reset_proxyid = PCPU_GET(cpuid); + cpustop_restartfunc = cpu_reset_proxy; + cpu_reset_proxy_active = 0; + printf("cpu_reset: Restarting BSP\n"); + started_cpus = (1<<0); /* Restart CPU #0 */ + + cnt = 0; + while (cpu_reset_proxy_active == 0 && cnt < 10000000) + cnt++; /* Wait for BSP to announce restart */ + if (cpu_reset_proxy_active == 0) + printf("cpu_reset: Failed to restart BSP\n"); + enable_intr(); + cpu_reset_proxy_active = 2; + + while (1); + /* NOTREACHED */ + } + } +#else cpu_reset_real(); +#endif } static void diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c index 991c85b..e32cfcd 100644 --- a/sys/amd64/ia32/ia32_syscall.c +++ b/sys/amd64/ia32/ia32_syscall.c @@ -76,11 +76,9 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <machine/cpu.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> - #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd); diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index 9d5cdff..4dea040 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -32,6 +32,8 @@ #ifndef _MACHINE_APICVAR_H_ #define _MACHINE_APICVAR_H_ +#include <machine/segments.h> + /* * Local && I/O APIC variable definitions. */ @@ -156,7 +158,7 @@ int lapic_intr_pending(u_int vector); void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); -void lapic_handle_intr(struct intrframe frame); +void lapic_handle_intr(void *cookie, struct intrframe frame); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index 9893e5f..404287e 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -138,4 +138,13 @@ #define MEXITCOUNT #endif /* GPROF */ +#ifdef LOCORE +/* + * Convenience macros for declaring interrupt entry points. + */ +#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ + .type __CONCAT(X,name),@function; __CONCAT(X,name): + +#endif /* LOCORE */ + #endif /* !_MACHINE_ASMACROS_H_ */ diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index 0c4d9b0..bc10631 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -82,7 +82,7 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) * For userland, assume the SMP case and use lock prefixes so that * the binaries will run on both types of systems. */ -#if !defined(_KERNEL) +#if defined(SMP) || !defined(_KERNEL) #define MPLOCKED lock ; #else #define MPLOCKED diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h index 359e173..9e3e4e5 100644 --- a/sys/amd64/include/clock.h +++ b/sys/amd64/include/clock.h @@ -15,6 +15,7 @@ * XXX large parts of the driver and its interface are misplaced. */ extern int adjkerntz; +extern int clkintr_pending; extern int disable_rtc_set; extern int pscnt; extern int psdiv; diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 38b07eb..2f32cfd 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -38,6 +38,7 @@ extern long Maxmem; extern u_long atdevbase; /* offset in virtual memory of ISA io mem */ +extern u_int basemem; extern int busdma_swi_pending; extern u_int cpu_exthigh; extern u_int cpu_feature; @@ -50,7 +51,8 @@ extern uint16_t *elan_mmcr; extern char kstack[]; extern char sigcode[]; extern int szsigcode; -extern u_int basemem; + +extern struct pcpu __pcpu[]; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); struct thread; @@ -66,6 +68,7 @@ void enable_sse(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); void pagezero(void *addr); int is_physical_memory(vm_offset_t addr); +int isa_nmi(int cd); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); void fpstate_drop(struct thread *td); diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 293f909..7c3a831 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -83,7 +83,11 @@ #define MACHINE_ARCH "amd64" #endif +#ifdef SMP +#define MAXCPU 8 +#else #define MAXCPU 1 +#endif #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 7ebfbba..9e0383a 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -40,7 +40,11 @@ */ #define PCPU_MD_FIELDS \ struct pcpu *pc_prvspace; /* Self-reference */ \ + struct pmap *pc_curpmap; \ + struct amd64tss *pc_tssp; \ + register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ + u_int pc_apic_id; \ u_int pc_acpi_id /* ACPI CPU id */ #if defined(lint) diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 77a25a4..d6ba652 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -221,7 +221,8 @@ struct md_page { struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - u_long pm_active; /* active on cpus */ + u_int pm_active; /* active on cpus */ + /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ }; diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h index d924eb3..806e9d5 100644 --- a/sys/amd64/include/segments.h +++ b/sys/amd64/include/segments.h @@ -49,6 +49,7 @@ * Selectors */ +#define SEL_RPL_MASK 3 /* requester priv level */ #define ISPL(s) ((s)&3) /* what is the priority level of a selector */ #define SEL_KPL 0 /* kernel priority level */ #define SEL_UPL 3 /* user priority level */ @@ -191,6 +192,7 @@ struct region_descriptor { #define IDT_AC 17 /* #AC: Alignment Check */ #define IDT_MC 18 /* #MC: Machine Check */ #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ +#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ /* @@ -210,6 +212,7 @@ struct region_descriptor { extern struct user_segment_descriptor gdt[]; extern struct soft_segment_descriptor gdt_segs[]; extern struct gate_descriptor *idt; +extern struct region_descriptor r_gdt, r_idt; void lgdt(struct region_descriptor *rdp); void sdtossd(struct user_segment_descriptor *sdp, diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 3d8d117..c6e7fcf 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -13,4 +13,63 @@ #ifndef _MACHINE_SMP_H_ #define _MACHINE_SMP_H_ +#ifdef _KERNEL + +#ifdef SMP + +#ifndef LOCORE + +#include <sys/bus.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> +#include <machine/apicvar.h> + +/* global symbols in mpboot.S */ +extern char mptramp_start[]; +extern char mptramp_end[]; +extern u_int32_t mptramp_pagetables; + +/* global data in mp_machdep.c */ +extern int mp_naps; +extern int boot_cpu_id; +extern struct pcb stoppcbs[]; +extern struct mtx smp_tlb_mtx; + +/* IPI handlers */ +inthand_t + IDTVEC(invltlb), /* TLB shootdowns - global */ + IDTVEC(invlpg), /* TLB shootdowns - 1 page */ + IDTVEC(invlrng), /* TLB shootdowns - page range */ + IDTVEC(hardclock), /* Forward hardclock() */ + IDTVEC(statclock), /* Forward statclock() */ + IDTVEC(cpuast), /* Additional software trap on other cpu */ + IDTVEC(cpustop), /* CPU stops & waits to be restarted */ + IDTVEC(rendezvous), /* handle CPU rendezvous */ + IDTVEC(lazypmap); /* handle lazy pmap release */ + +/* functions in mp_machdep.c */ +void cpu_add(u_int apic_id, char boot_cpu); +void init_secondary(void); +void ipi_selected(u_int cpus, u_int ipi); +void ipi_all(u_int ipi); +void ipi_all_but_self(u_int ipi); +void ipi_self(u_int ipi); +void forward_statclock(void); +void forwarded_statclock(struct clockframe frame); +void forward_hardclock(void); +void forwarded_hardclock(struct clockframe frame); +u_int mp_bootaddress(u_int); +int mp_grab_cpu_hlt(void); +void smp_invlpg(vm_offset_t addr); +void smp_masked_invlpg(u_int mask, vm_offset_t addr); +void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); +void smp_masked_invlpg_range(u_int mask, vm_offset_t startva, + vm_offset_t endva); +void smp_invltlb(void); +void smp_masked_invltlb(u_int mask); + +#endif /* !LOCORE */ +#endif /* SMP */ + +#endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/amd64/include/tss.h b/sys/amd64/include/tss.h index aa60ba0..24d31b3 100644 --- a/sys/amd64/include/tss.h +++ b/sys/amd64/include/tss.h @@ -69,7 +69,7 @@ struct amd64tss { }; #ifdef _KERNEL -extern struct amd64tss common_tss; +extern struct amd64tss common_tss[]; #endif #endif /* _MACHINE_TSS_H_ */ diff --git a/sys/amd64/isa/atpic.c b/sys/amd64/isa/atpic.c index ba44094..64fa45e3 100644 --- a/sys/amd64/isa/atpic.c +++ b/sys/amd64/isa/atpic.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/proc.h> #include <machine/cpufunc.h> #include <machine/frame.h> @@ -52,26 +53,14 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> #include <machine/segments.h> -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> + #include <isa/isavar.h> #define MASTER 0 #define SLAVE 1 -/* XXX: Magic numbers */ -#ifdef PC98 -#ifdef AUTO_EOI_1 -#define MASTER_MODE 0x1f /* Master auto EOI, 8086 mode */ -#else -#define MASTER_MODE 0x1d /* Master 8086 mode */ -#endif -#define SLAVE_MODE 9 /* 8086 mode */ -#else /* IBM-PC */ #ifdef AUTO_EOI_1 #define MASTER_MODE (ICW4_8086 | ICW4_AEOI) #else @@ -82,7 +71,6 @@ __FBSDID("$FreeBSD$"); #else #define SLAVE_MODE ICW4_8086 #endif -#endif /* PC98 */ static void atpic_init(void *dummy); @@ -252,13 +240,7 @@ i8259_init(struct atpic *pic, int slave) /* Reset the PIC and program with next four bytes. */ mtx_lock_spin(&icu_lock); -#ifdef DEV_MCA - /* MCA uses level triggered interrupts. */ - if (MCA_system) - outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4 | ICW1_LTIM); - else -#endif - outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); + outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET; /* Start vector. */ @@ -286,11 +268,9 @@ i8259_init(struct atpic *pic, int slave) /* Reset is finished, default to IRR on read. */ outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR); -#ifndef PC98 /* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */ if (!slave) outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1); -#endif mtx_unlock_spin(&icu_lock); } @@ -317,21 +297,36 @@ atpic_init(void *dummy __unused) continue; ai = &atintrs[i]; setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase + - ai->at_irq, ai->at_intr, SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + ai->at_irq, ai->at_intr, SDT_SYSIGT, SEL_KPL, 0); intr_register_source(&ai->at_intsrc); } } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_SECOND + 1, atpic_init, NULL) void -atpic_handle_intr(struct intrframe iframe) +atpic_handle_intr(void *cookie, struct intrframe iframe) { struct intsrc *isrc; - - KASSERT((uint)iframe.if_vec < ICU_LEN, - ("unknown int %d\n", iframe.if_vec)); - isrc = &atintrs[iframe.if_vec].at_intsrc; + int vec = (uintptr_t)cookie; + + KASSERT(vec < ICU_LEN, ("unknown int %d\n", vec)); + isrc = &atintrs[vec].at_intsrc; + if (vec == 7 || vec == 15) { + int port, isr; + + /* + * Read the ISR register to see if IRQ 7/15 is really + * pending. Reset read register back to IRR when done. + */ + port = ((struct atpic *)isrc->is_pic)->at_ioaddr; + mtx_lock_spin(&icu_lock); + outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS); + isr = inb(port); + outb(port, OCW3_SEL | OCW3_RR); + mtx_unlock_spin(&icu_lock); + if ((isr & IRQ7) == 0) + return; + } intr_execute_handlers(isrc, &iframe); } @@ -398,9 +393,7 @@ static driver_t atpic_driver = { static devclass_t atpic_devclass; DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); -#ifndef PC98 DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); -#endif /* * Return a bitmap of the current interrupt requests. This is 8259-specific diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S index e3dc38f..e41071df 100644 --- a/sys/amd64/isa/atpic_vector.S +++ b/sys/amd64/isa/atpic_vector.S @@ -41,12 +41,8 @@ */ #include <machine/asmacros.h> -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> #include "assym.s" @@ -57,23 +53,28 @@ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ -; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ + movq $irq_num, %rdi; /* pass the IRQ */ \ call atpic_handle_intr ; \ - addl $4, %esp ; /* discard the parameter */ \ -; \ MEXITCOUNT ; \ jmp doreti diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index a11afb4..a2e751e 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -69,8 +69,12 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/psl.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <machine/specialreg.h> #include <amd64/isa/icu.h> @@ -81,8 +85,6 @@ __FBSDID("$FreeBSD$"); #endif #include <amd64/isa/timerreg.h> -#include <amd64/isa/intr_machdep.h> - /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. @@ -112,6 +114,7 @@ static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; +static struct intsrc *i8254_intsrc; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; @@ -122,7 +125,6 @@ static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; #define ACQUIRE_PENDING 3 static u_char timer2_state; -static void (*timer_func)(struct clockframe *frame) = hardclock; static unsigned i8254_get_timecount(struct timecounter *tc); static void set_timer_freq(u_int freq, int intr_freq); @@ -137,7 +139,7 @@ static struct timecounter i8254_timecounter = { }; static void -clkintr(struct clockframe frame) +clkintr(struct clockframe *frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { @@ -151,7 +153,10 @@ clkintr(struct clockframe frame) clkintr_pending = 0; mtx_unlock_spin(&clock_lock); } - timer_func(&frame); + hardclock(frame); +#ifdef SMP + forward_hardclock(); +#endif } int @@ -207,16 +212,19 @@ release_timer2() * in the statistics, but the stat clock will no longer stop. */ static void -rtcintr(struct clockframe frame) +rtcintr(struct clockframe *frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) { if (profprocs != 0) { if (--pscnt == 0) pscnt = psdiv; - profclock(&frame); + profclock(frame); } if (pscnt == psdiv) - statclock(&frame); + statclock(frame); +#ifdef SMP + forward_statclock(); +#endif } } @@ -719,7 +727,6 @@ void cpu_initclocks() { int diag; - register_t crit; if (statclock_disable) { /* @@ -735,19 +742,9 @@ cpu_initclocks() profhz = RTC_PROFRATE; } - /* Finish initializing 8253 timer 0. */ - /* - * XXX Check the priority of this interrupt handler. I - * couldn't find anything suitable in the BSD/OS code (grog, - * 19 July 2000). - */ - inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, + /* Finish initializing 8254 timer 0. */ + intr_add_handler("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(IRQ0); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); @@ -760,14 +757,9 @@ cpu_initclocks() if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); - inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, + intr_add_handler("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(IRQ8); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); + i8254_intsrc = intr_lookup_source(8); writertc(RTC_STATUSB, rtc_statusb); @@ -833,8 +825,8 @@ i8254_get_timecount(struct timecounter *tc) if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(rflags & PSL_I) && count < timer0_max_count / 2u)) && - (inb(IO_ICU1) & 1))) - )) { + i8254_intsrc != NULL && + i8254_intsrc->is_pic->pic_source_pending(i8254_intsrc))))) { i8254_ticked = 1; i8254_offset += timer0_max_count; } diff --git a/sys/amd64/isa/icu.h b/sys/amd64/isa/icu.h deleted file mode 100644 index 06b8955..0000000 --- a/sys/amd64/isa/icu.h +++ /dev/null @@ -1,152 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)icu.h 5.6 (Berkeley) 5/9/91 - * $FreeBSD$ - */ - -/* - * AT/386 Interrupt Control constants - * W. Jolitz 8/89 - */ - -#ifndef _I386_ISA_ICU_H_ -#define _I386_ISA_ICU_H_ - -#ifndef LOCORE - -/* - * Note: - * Most of the SMP equivilants of the icu macros are coded - * elsewhere in an MP-safe fashion. - * In particular note that the 'imen' variable is opaque. - * DO NOT access imen directly, use INTREN()/INTRDIS(). - */ - -void INTREN(u_int); -void INTRDIS(u_int); - -extern unsigned imen; /* interrupt mask enable */ - -#endif /* LOCORE */ - - -/* - * Interrupt enable bits - in normal order of priority (which we change) - */ -#define IRQ0 0x0001 /* highest priority - timer */ -#define IRQ1 0x0002 -#define IRQ_SLAVE 0x0004 -#define IRQ8 0x0100 -#define IRQ9 0x0200 -#define IRQ2 IRQ9 -#define IRQ10 0x0400 -#define IRQ11 0x0800 -#define IRQ12 0x1000 -#define IRQ13 0x2000 -#define IRQ14 0x4000 -#define IRQ15 0x8000 -#define IRQ3 0x0008 /* this is highest after rotation */ -#define IRQ4 0x0010 -#define IRQ5 0x0020 -#define IRQ6 0x0040 -#define IRQ7 0x0080 /* lowest - parallel printer */ - -/* Initialization control word 1. Written to even address. */ -#define ICW1_IC4 0x01 /* ICW4 present */ -#define ICW1_SNGL 0x02 /* 1 = single, 0 = cascaded */ -#define ICW1_ADI 0x04 /* 1 = 4, 0 = 8 byte vectors */ -#define ICW1_LTIM 0x08 /* 1 = level trigger, 0 = edge */ -#define ICW1_RESET 0x10 /* must be 1 */ -/* 0x20 - 0x80 - in 8080/8085 mode only */ - -/* Initialization control word 2. Written to the odd address. */ -/* No definitions, it is the base vector of the IDT for 8086 mode */ - -/* Initialization control word 3. Written to the odd address. */ -/* For a master PIC, bitfield indicating a slave 8259 on given input */ -/* For slave, lower 3 bits are the slave's ID binary id on master */ - -/* Initialization control word 4. Written to the odd address. */ -#define ICW4_8086 0x01 /* 1 = 8086, 0 = 8080 */ -#define ICW4_AEOI 0x02 /* 1 = Auto EOI */ -#define ICW4_MS 0x04 /* 1 = buffered master, 0 = slave */ -#define ICW4_BUF 0x08 /* 1 = enable buffer mode */ -#define ICW4_SFNM 0x10 /* 1 = special fully nested mode */ - -/* Operation control words. Written after initialization. */ - -/* Operation control word type 1 */ -/* - * No definitions. Written to the odd address. Bitmask for interrupts. - * 1 = disabled. - */ - -/* Operation control word type 2. Bit 3 (0x08) must be zero. Even address. */ -#define OCW2_L0 0x01 /* Level */ -#define OCW2_L1 0x02 -#define OCW2_L2 0x04 -/* 0x08 must be 0 to select OCW2 vs OCW3 */ -/* 0x10 must be 0 to select OCW2 vs ICW1 */ -#define OCW2_EOI 0x20 /* 1 = EOI */ -#define OCW2_SL 0x40 /* EOI mode */ -#define OCW2_R 0x80 /* EOI mode */ - -/* Operation control word type 3. Bit 3 (0x08) must be set. Even address. */ -#define OCW3_RIS 0x01 -#define OCW3_RR 0x02 -#define OCW3_P 0x04 -/* 0x08 must be 1 to select OCW3 vs OCW2 */ -#define OCW3_SEL 0x08 /* must be 1 */ -/* 0x10 must be 0 to select OCW3 vs ICW1 */ -#define OCW3_SMM 0x20 /* special mode mask */ -#define OCW3_ESMM 0x40 /* enable SMM */ - -/* - * Interrupt Control offset into Interrupt descriptor table (IDT) - */ -#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ -#define ICU_LEN 16 /* 32-47 are ISA interrupts */ -#define HWI_MASK 0xffff /* bits for h/w interrupts */ -#define NHWI 16 - -#define ICU_IMR_OFFSET 1 -#define ICU_SLAVEID 2 -#define ICU_EOI (OCW2_EOI) /* non-specific EOI */ -#define ICU_SETPRI (OCW2_R | OCW2_SL) /* set rotation priority */ - -#define INTRCNT_COUNT (1 + ICU_LEN + 2 * ICU_LEN) - -#endif /* !_I386_ISA_ICU_H_ */ diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S deleted file mode 100644 index ad883fe..0000000 --- a/sys/amd64/isa/icu_ipl.S +++ /dev/null @@ -1,81 +0,0 @@ -/*- - * Copyright (c) 1989, 1990 William F. Jolitz. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - - .data - ALIGN_DATA - -/* interrupt mask enable (all h/w off) */ - .globl imen -imen: .long HWI_MASK - - .text - SUPERALIGN_TEXT - -ENTRY(INTREN) - movq %rdi, %rax - movl %eax, %ecx - notl %eax - andl %eax, imen - movl imen, %eax - testb %cl, %cl - je 1f - outb %al, $(IO_ICU1 + ICU_IMR_OFFSET) -1: - testb %ch, %ch - je 2f - shrl $8, %eax - outb %al, $(IO_ICU2 + ICU_IMR_OFFSET) -2: - ret - -ENTRY(INTRDIS) - movq %rdi, %rax - movl %eax, %ecx - orl %eax, imen - movl imen, %eax - testb %cl, %cl - je 1f - outb %al, $(IO_ICU1 + ICU_IMR_OFFSET) -1: - testb %ch, %ch - je 2f - shrl $8, %eax - outb %al, $(IO_ICU2 + ICU_IMR_OFFSET) -2: - ret diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S deleted file mode 100644 index 123a731..0000000 --- a/sys/amd64/isa/icu_vector.S +++ /dev/null @@ -1,144 +0,0 @@ -/* - * from: vector.s, 386BSD 0.1 unknown origin - * $FreeBSD$ - */ - -#define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) -#define IRQ_BYTE(irq_num) ((irq_num) >> 3) - -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ - outb %al,$IO_ICU1 /* ... to clear in service bit */ - -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ - outb %al,$IO_ICU1 /* ... then first icu */ - - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ - jz 1f ; /* Yes, dont swapgs again */ \ - swapgs ; \ -1: movq %rdi,TF_RDI(%rsp) ; \ - movq %rsi,TF_RSI(%rsp) ; \ - movq %rdx,TF_RDX(%rsp) ; \ - movq %rcx,TF_RCX(%rsp) ; \ - movq %r8,TF_R8(%rsp) ; \ - movq %r9,TF_R9(%rsp) ; \ - movq %rax,TF_RAX(%rsp) ; \ - movq %rbx,TF_RBX(%rsp) ; \ - movq %rbp,TF_RBP(%rsp) ; \ - movq %r10,TF_R10(%rsp) ; \ - movq %r11,TF_R11(%rsp) ; \ - movq %r12,TF_R12(%rsp) ; \ - movq %r13,TF_R13(%rsp) ; \ - movq %r14,TF_R14(%rsp) ; \ - movq %r15,TF_R15(%rsp) ; \ - call critical_enter ; \ - movq PCPU(CURTHREAD),%rbx ; \ - incl TD_INTR_NESTING_LEVEL(%rbx) ; \ - movq intr_unit + (irq_num) * 8, %rdi ; \ - call *intr_handler + (irq_num) * 8 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movq intr_countp + (irq_num) * 8,%rax ; \ - incq (%rax) ; \ - decl TD_INTR_NESTING_LEVEL(%rbx) ; \ - call critical_exit ; \ - jmp doreti - -/* - * Slow, threaded interrupts. - * - * XXX Most of the parameters here are obsolete. Fix this when we're - * done. - * XXX we really shouldn't return via doreti if we just schedule the - * interrupt handler and don't run anything. We could just do an - * iret. FIXME. - */ -#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ - jz 1f ; /* Yes, dont swapgs again */ \ - swapgs ; \ -1: movq %rdi,TF_RDI(%rsp) ; \ - movq %rsi,TF_RSI(%rsp) ; \ - movq %rdx,TF_RDX(%rsp) ; \ - movq %rcx,TF_RCX(%rsp) ; \ - movq %r8,TF_R8(%rsp) ; \ - movq %r9,TF_R9(%rsp) ; \ - movq %rax,TF_RAX(%rsp) ; \ - movq %rbx,TF_RBX(%rsp) ; \ - movq %rbp,TF_RBP(%rsp) ; \ - movq %r10,TF_R10(%rsp) ; \ - movq %r11,TF_R11(%rsp) ; \ - movq %r12,TF_R12(%rsp) ; \ - movq %r13,TF_R13(%rsp) ; \ - movq %r14,TF_R14(%rsp) ; \ - movq %r15,TF_R15(%rsp) ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movq PCPU(CURTHREAD),%rbx ; \ - incl TD_INTR_NESTING_LEVEL(%rbx) ; \ - movq $irq_num, %rdi; /* pass the IRQ */ \ - call sched_ithd ; \ - decl TD_INTR_NESTING_LEVEL(%rbx) ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti - -MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) - -#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) -/* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) - -MCOUNT_LABEL(eintr) - diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c deleted file mode 100644 index 30c12e7..0000000 --- a/sys/amd64/isa/intr_machdep.c +++ /dev/null @@ -1,525 +0,0 @@ -/*- - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)isa.c 7.2 (Berkeley) 5/13/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_isa.h" - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/errno.h> -#include <sys/interrupt.h> -#include <sys/kernel.h> -#include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/syslog.h> -#include <sys/systm.h> -#include <sys/unistd.h> - -#include <machine/md_var.h> -#include <machine/segments.h> - -#include <amd64/isa/isa.h> -#include <amd64/isa/icu.h> - -#ifdef DEV_ISA -#include <isa/isavar.h> -#endif -#include <amd64/isa/intr_machdep.h> -#include <sys/interrupt.h> - -/* - * Per-interrupt data. - */ -u_long *intr_countp[ICU_LEN]; /* pointers to interrupt counters */ -driver_intr_t *intr_handler[ICU_LEN]; /* first level interrupt handler */ -struct ithd *ithds[ICU_LEN]; /* real interrupt handler */ -void *intr_unit[ICU_LEN]; - -static struct mtx ithds_table_lock; /* protect the ithds table */ - -static inthand_t *fastintr[ICU_LEN] = { - IDTVEC(fastintr0), IDTVEC(fastintr1), - IDTVEC(fastintr2), IDTVEC(fastintr3), - IDTVEC(fastintr4), IDTVEC(fastintr5), - IDTVEC(fastintr6), IDTVEC(fastintr7), - IDTVEC(fastintr8), IDTVEC(fastintr9), - IDTVEC(fastintr10), IDTVEC(fastintr11), - IDTVEC(fastintr12), IDTVEC(fastintr13), - IDTVEC(fastintr14), IDTVEC(fastintr15), -}; - -static inthand_t *slowintr[ICU_LEN] = { - IDTVEC(intr0), IDTVEC(intr1), IDTVEC(intr2), IDTVEC(intr3), - IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), - IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), - IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15), -}; - -static driver_intr_t isa_strayintr; - -static void ithds_init(void *dummy); -static void ithread_enable(uintptr_t vector); -static void ithread_disable(uintptr_t vector); -static void init_i8259(void); - -#define NMI_PARITY (1 << 7) -#define NMI_IOCHAN (1 << 6) -#define ENMI_WATCHDOG (1 << 7) -#define ENMI_BUSTIMER (1 << 6) -#define ENMI_IOSTATUS (1 << 5) - -#ifdef DEV_ISA -/* - * Bus attachment for the ISA PIC. - */ -static struct isa_pnp_id atpic_ids[] = { - { 0x0000d041 /* PNP0000 */, "AT interrupt controller" }, - { 0 } -}; - -static int -atpic_probe(device_t dev) -{ - int result; - - if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids)) <= 0) - device_quiet(dev); - return(result); -} - -/* - * The generic ISA attachment code will handle allocating any other resources - * that we don't explicitly claim here. - */ -static int -atpic_attach(device_t dev) -{ - return(0); -} - -static device_method_t atpic_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, atpic_probe), - DEVMETHOD(device_attach, atpic_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - { 0, 0 } -}; - -static driver_t atpic_driver = { - "atpic", - atpic_methods, - 1, /* no softc */ -}; - -static devclass_t atpic_devclass; - -DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); -DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); -#endif /* DEV_ISA */ - -/* - * Handle a NMI, possibly a machine check. - * return true to panic system, false to ignore. - */ -int -isa_nmi(cd) - int cd; -{ - int retval = 0; - int isa_port = inb(0x61); - - log(LOG_CRIT, "NMI ISA STATUS 0x%02x", isa_port); - - if (isa_port & NMI_PARITY) { - log(LOG_CRIT, "RAM parity error, likely hardware failure."); - retval = 1; - } - - if (isa_port & NMI_IOCHAN) { - log(LOG_CRIT, "I/O channel check, likely hardware failure."); - retval = 1; - } - - return (retval); -} - -/* - * ICU reinitialize when ICU configuration has lost. - */ -void icu_reinit() -{ - int i; - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - init_i8259(); - for(i=0;i<ICU_LEN;i++) - if(intr_handler[i] != isa_strayintr) - INTREN(1<<i); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -/* - * Create a default interrupt table to avoid problems caused by - * spurious interrupts during configuration of kernel, then setup - * interrupt control unit. - */ -void -isa_defaultirq() -{ - int i; - register_t crit; - - /* icu vectors */ - for (i = 0; i < ICU_LEN; i++) - icu_unset(i, (driver_intr_t *)NULL); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - init_i8259(); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - - -/* - *initialize 8259's - */ -static void init_i8259() -{ - - outb(IO_ICU1, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ - - outb(IO_ICU1+ICU_IMR_OFFSET, NRSVIDT); /* starting at this vector index */ - outb(IO_ICU1+ICU_IMR_OFFSET, IRQ_SLAVE);/* slave on line 2 */ - outb(IO_ICU1+ICU_IMR_OFFSET, ICW4_8086);/* 8086 mode */ - outb(IO_ICU1+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU1, OCW3_SEL | OCW3_RR); /* default to IRR on read */ - outb(IO_ICU1, ICU_SETPRI | 0x2);/* pri order 3-7, 0-2 (com2 first) */ - - outb(IO_ICU2, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ - - outb(IO_ICU2+ICU_IMR_OFFSET, NRSVIDT+8); /* staring at this vector index */ - outb(IO_ICU2+ICU_IMR_OFFSET, ICU_SLAVEID); /* my slave id is 2 */ - outb(IO_ICU2+ICU_IMR_OFFSET, ICW4_8086); /* 8086 mode */ - outb(IO_ICU2+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU2, OCW3_SEL | OCW3_RR); /* default to IRR on read */ -} - -/* - * Caught a stray interrupt, notify - */ -static int isaglitch7; -static int isaglitch15; - -static void -isa_strayintr(vcookiep) - void *vcookiep; -{ - int intr = (void **)vcookiep - &intr_unit[0]; - int isr; - - /* Determine if it is a stray interrupt or simply a glitch */ - if (intr == 7) { - outb(IO_ICU1, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU1); - outb(IO_ICU1, OCW3_SEL | OCW3_RR | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - isaglitch7++; - return; - } - } - if (intr == 15) { - outb(IO_ICU2, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU2); - outb(IO_ICU2, OCW3_SEL | OCW3_RR); /* reselect IIR */ - if ((isr & 0x80) == 0) { - isaglitch15++; - return; - } - } - if (intrcnt[1 + intr] <= 5) - log(LOG_ERR, "stray irq %d\n", intr); - if (intrcnt[1 + intr] == 5) - log(LOG_CRIT, - "too many stray irq %d's; not logging any more\n", intr); -} - -#ifdef DEV_ISA -/* - * Return a bitmap of the current interrupt requests. This is 8259-specific - * and is only suitable for use at probe time. - */ -intrmask_t -isa_irq_pending() -{ - u_char irr1; - u_char irr2; - - irr1 = inb(IO_ICU1); - irr2 = inb(IO_ICU2); - return ((irr2 << 8) | irr1); -} -#endif - -/* - * Update intrnames array with the specified name. This is used by - * vmstat(8) and the like. - */ -static void -update_intrname(int intr, const char *name) -{ - char buf[32]; - char *cp; - int name_index, off, strayintr; - - /* - * Initialise strings for bitbucket and stray interrupt counters. - * These have statically allocated indices 0 and 1 through ICU_LEN. - */ - if (intrnames[0] == '\0') { - off = sprintf(intrnames, "???") + 1; - for (strayintr = 0; strayintr < ICU_LEN; strayintr++) - off += sprintf(intrnames + off, "stray irq%d", - strayintr) + 1; - } - - if (name == NULL) - name = "???"; - if (snprintf(buf, sizeof(buf), "%s irq%d", name, intr) >= sizeof(buf)) - goto use_bitbucket; - - /* - * Search for `buf' in `intrnames'. In the usual case when it is - * not found, append it to the end if there is enough space (the \0 - * terminator for the previous string, if any, becomes a separator). - */ - for (cp = intrnames, name_index = 0; - cp != eintrnames && name_index < NR_INTRNAMES; - cp += strlen(cp) + 1, name_index++) { - if (*cp == '\0') { - if (strlen(buf) >= eintrnames - cp) - break; - strcpy(cp, buf); - goto found; - } - if (strcmp(cp, buf) == 0) - goto found; - } - -use_bitbucket: - printf("update_intrname: counting %s irq%d as %s\n", name, intr, - intrnames); - name_index = 0; -found: - intr_countp[intr] = &intrcnt[name_index]; -} - -int -icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) -{ - register_t crit; - - if ((u_int)intr >= ICU_LEN || intr == ICU_SLAVEID) - return (EINVAL); -#if 0 - if (intr_handler[intr] != isa_strayintr) - return (EBUSY); -#endif - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - intr_handler[intr] = handler; - intr_unit[intr] = arg; - setidt(ICU_OFFSET + intr, - flags & INTR_FAST ? fastintr[intr] : slowintr[intr], - SDT_SYSIGT, SEL_KPL, 0); - INTREN(1 << intr); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - return (0); -} - -/* - * Dissociate an interrupt handler from an IRQ and set the handler to - * the stray interrupt handler. The 'handler' parameter is used only - * for consistency checking. - */ -int -icu_unset(intr, handler) - int intr; - driver_intr_t *handler; -{ - register_t crit; - - if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) - return (EINVAL); - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << intr); - intr_countp[intr] = &intrcnt[1 + intr]; - intr_handler[intr] = isa_strayintr; - intr_unit[intr] = &intr_unit[intr]; - setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYSIGT, SEL_KPL, 0); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - return (0); -} - -static void -ithds_init(void *dummy) -{ - - mtx_init(&ithds_table_lock, "ithread table lock", NULL, MTX_SPIN); -} -SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); - -static void -ithread_enable(uintptr_t vector) -{ - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(1 << vector); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -static void -ithread_disable(uintptr_t vector) -{ - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << vector); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -int -inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, - enum intr_type flags, void **cookiep) -{ - struct ithd *ithd; /* descriptor for the IRQ */ - int errcode = 0; - int created_ithd = 0; - - /* - * Work around a race where more than one CPU may be registering - * handlers on the same IRQ at the same time. - */ - mtx_lock_spin(&ithds_table_lock); - ithd = ithds[irq]; - mtx_unlock_spin(&ithds_table_lock); - if (ithd == NULL) { - errcode = ithread_create(&ithd, irq, 0, ithread_disable, - ithread_enable, "irq%d:", irq); - if (errcode) - return (errcode); - mtx_lock_spin(&ithds_table_lock); - if (ithds[irq] == NULL) { - ithds[irq] = ithd; - created_ithd++; - mtx_unlock_spin(&ithds_table_lock); - } else { - struct ithd *orphan; - - orphan = ithd; - ithd = ithds[irq]; - mtx_unlock_spin(&ithds_table_lock); - ithread_destroy(orphan); - } - } - - errcode = ithread_add_handler(ithd, name, handler, arg, - ithread_priority(flags), flags, cookiep); - - if ((flags & INTR_FAST) == 0 || errcode) - /* - * The interrupt process must be in place, but - * not necessarily schedulable, before we - * initialize the ICU, since it may cause an - * immediate interrupt. - */ - if (icu_setup(irq, sched_ithd, arg, flags) != 0) - panic("inthand_add: Can't initialize ICU"); - - if (errcode) - return (errcode); - - if (flags & INTR_FAST) { - errcode = icu_setup(irq, handler, arg, flags); - if (errcode && bootverbose) - printf("\tinthand_add(irq%d) failed, result=%d\n", - irq, errcode); - if (errcode) - return (errcode); - } - - update_intrname(irq, name); - return (0); -} - -/* - * Deactivate and remove linked list the interrupt handler descriptor - * data connected created by an earlier call of inthand_add(), then - * adjust the interrupt masks if necessary. - * - * Return the memory held by the interrupt handler descriptor data - * structure to the system. First ensure the handler is not actively - * in use. - */ -int -inthand_remove(void *cookie) -{ - - return (ithread_remove_handler(cookie)); -} diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h deleted file mode 100644 index 26f500e..0000000 --- a/sys/amd64/isa/intr_machdep.h +++ /dev/null @@ -1,118 +0,0 @@ -/*- - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _I386_ISA_INTR_MACHDEP_H_ -#define _I386_ISA_INTR_MACHDEP_H_ - -/* - * Low level interrupt code. - */ - -#ifdef _KERNEL - -#ifdef LOCORE - -/* - * Protects the IO APIC, 8259 PIC, imen, and apic_imen - */ -#define ICU_LOCK MTX_LOCK_SPIN(icu_lock, 0) -#define ICU_UNLOCK MTX_UNLOCK_SPIN(icu_lock) - -#else /* LOCORE */ - -/* - * Type of the first (asm) part of an interrupt handler. - */ -typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); -typedef void unpendhand_t(void); - -#define IDTVEC(name) __CONCAT(X,name) - -extern u_long *intr_countp[]; /* pointers into intrcnt[] */ -extern driver_intr_t *intr_handler[]; /* C entry points of intr handlers */ -extern struct ithd *ithds[]; -extern void *intr_unit[]; /* cookies to pass to intr handlers */ -extern struct mtx icu_lock; - -inthand_t - IDTVEC(fastintr0), IDTVEC(fastintr1), - IDTVEC(fastintr2), IDTVEC(fastintr3), - IDTVEC(fastintr4), IDTVEC(fastintr5), - IDTVEC(fastintr6), IDTVEC(fastintr7), - IDTVEC(fastintr8), IDTVEC(fastintr9), - IDTVEC(fastintr10), IDTVEC(fastintr11), - IDTVEC(fastintr12), IDTVEC(fastintr13), - IDTVEC(fastintr14), IDTVEC(fastintr15); -inthand_t - IDTVEC(intr0), IDTVEC(intr1), IDTVEC(intr2), IDTVEC(intr3), - IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), - IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), - IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); -unpendhand_t - IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), - IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), - IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), - IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), - IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), - IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), - IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), - IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), - IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), - IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), - IDTVEC(fastunpend30), IDTVEC(fastunpend31); - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -void isa_defaultirq(void); -int isa_nmi(int cd); -int icu_setup(int intr, driver_intr_t *func, void *arg, int flags); -int icu_unset(int intr, driver_intr_t *handler); -void icu_reinit(void); - -/* - * WARNING: These are internal functions and not to be used by device drivers! - * They are subject to change without notice. - */ -int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, - enum intr_type flags, void **cookiep); -int inthand_remove(void *cookie); -void sched_ithd(void *dummy); -void call_fast_unpend(int irq); - -#endif /* LOCORE */ - -#endif /* _KERNEL */ - -#endif /* !_I386_ISA_INTR_MACHDEP_H_ */ diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c deleted file mode 100644 index e36e866..0000000 --- a/sys/amd64/isa/ithread.c +++ /dev/null @@ -1,115 +0,0 @@ -/*- - * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Berkeley Software Design Inc's name may not be used to endorse or - * promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * BSDI Id: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* Interrupt thread code. */ - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/interrupt.h> -#include <sys/systm.h> -#include <sys/vmmeter.h> - -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> -#include <amd64/isa/isa.h> - -struct int_entropy { - struct proc *p; - int irq; -}; - -static u_int straycount[ICU_LEN]; -static u_int glitchcount7; -static u_int glitchcount15; - -#define MAX_STRAY_LOG 5 - -/* - * Schedule a heavyweight interrupt process. This function is called - * from the interrupt handlers Xintr<num>. - */ -void -sched_ithd(void *cookie) -{ - int irq = (uintptr_t) cookie; /* IRQ we're handling */ - struct ithd *ithd = ithds[irq]; /* and the process that does it */ - int error, isr; - - /* This used to be in icu_vector.s */ - /* - * We count software interrupts when we process them. The - * code here follows previous practice, but there's an - * argument for counting hardware interrupts when they're - * processed too. - */ - atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */ - atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */ - - /* - * Schedule the interrupt thread to run if needed and switch to it - * if we schedule it if !cold. - */ - error = ithread_schedule(ithd, !cold); - - /* - * Log stray interrupts. - */ - if (error == EINVAL) { - /* Determine if it is a stray interrupt or simply a glitch */ - if (irq == 7) { - outb(IO_ICU1, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU1); - outb(IO_ICU1, OCW3_SEL | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - glitchcount7++; - return; - } - } - if (irq == 15) { - outb(IO_ICU2, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU2); - outb(IO_ICU2, OCW3_SEL | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - glitchcount15++; - return; - } - } - if (straycount[irq] < MAX_STRAY_LOG) { - printf("stray irq %d\n", irq); - if (++straycount[irq] == MAX_STRAY_LOG) - printf( - "got %d stray irq %d's: not logging anymore\n", - MAX_STRAY_LOG, irq); - } - } -} diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S deleted file mode 100644 index 2d7be50..0000000 --- a/sys/amd64/isa/vector.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * from: vector.s, 386BSD 0.1 unknown origin - * $FreeBSD$ - */ - -#include <amd64/isa/icu.h> -#include <amd64/isa/isa.h> -#include <amd64/isa/intr_machdep.h> - - .data - ALIGN_DATA - -/* - * Interrupt counters and names for export to vmstat(8) and friends. - * - * XXX this doesn't really belong here; everything except the labels - * for the endpointers is almost machine-independent. - */ - - .globl intrcnt, eintrcnt -intrcnt: - .space INTRCNT_COUNT * 8 -eintrcnt: - - .globl intrnames, eintrnames -intrnames: - .space INTRCNT_COUNT * 32 -eintrnames: - .text - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - * - * XXX - the interrupt frame is set up to look like a trap frame. This is - * usually a waste of time. The only interrupt handlers that want a frame - * are the clock handler (it wants a clock frame), the fpu handler (it's - * easier to do right all in assembler). The interrupt return routine - * needs a trap frame for rare AST's (it could easily convert the frame). - * The direct costs of setting up a trap frame are two pushl's (error - * code and trap number), an addl to get rid of these, and pushing and - * popping the call-saved regs %esi, %edi and %ebp twice, The indirect - * costs are making the driver interface nonuniform so unpending of - * interrupts is more complicated and slower (call_driver(unit) would - * be easier than ensuring an interrupt frame for all handlers. Finally, - * there are some struct copies in the fpu handler and maybe in the clock - * handler that could be avoided by working more with pointers to frames - * instead of frames. - * - * XXX - should we do a cld on every system entry to avoid the requirement - * for scattered cld's? - * - * Coding notes for *.s: - * - * If possible, avoid operations that involve an operand size override. - * Word-sized operations might be smaller, but the operand size override - * makes them slower on on 486's and no faster on 386's unless perhaps - * the instruction pipeline is depleted. E.g., - * - * Use movl to seg regs instead of the equivalent but more descriptive - * movw - gas generates an irelevant (slower) operand size override. - * - * Use movl to ordinary regs in preference to movw and especially - * in preference to movz[bw]l. Use unsigned (long) variables with the - * top bits clear instead of unsigned short variables to provide more - * opportunities for movl. - * - * If possible, use byte-sized operations. They are smaller and no slower. - * - * Use (%reg) instead of 0(%reg) - gas generates larger code for the latter. - * - * If the interrupt frame is made more flexible, INTR can push %eax first - * and decide the ipending case with less overhead, e.g., by avoiding - * loading segregs. - */ - -#include "amd64/isa/icu_vector.S" diff --git a/sys/amd64/pci/pci_bus.c b/sys/amd64/pci/pci_bus.c index 3f0b9dc..6be70b7 100644 --- a/sys/amd64/pci/pci_bus.c +++ b/sys/amd64/pci/pci_bus.c @@ -426,7 +426,7 @@ legacy_pcib_probe(device_t dev) if (pci_cfgregopen() == 0) return ENXIO; - return 0; + return -100; } int diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 0af5b67..82ace4c 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -8,48 +8,26 @@ # dependency lines other than the first are silently ignored. # -ia32_genassym.o optional ia32 \ +ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "ia32_genassym.o" # -ia32_assym.h optional ia32 \ +ia32_assym.h standard \ dependency "$S/kern/genassym.sh ia32_genassym.o" \ compile-with "env NM=${NM} sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "ia32_assym.h" # - -dev/kbd/atkbd.c optional atkbd -dev/kbd/atkbdc.c optional atkbdc -dev/kbd/kbd.c optional atkbd -dev/kbd/kbd.c optional kbd -dev/kbd/kbd.c optional sc -dev/kbd/kbd.c optional ukbd -dev/kbd/kbd.c optional vt -dev/sio/sio.c optional sio -dev/sio/sio_isa.c optional sio isa -dev/syscons/apm/apm_saver.c optional apm_saver apm -dev/syscons/schistory.c optional sc -dev/syscons/scmouse.c optional sc -dev/syscons/scterm.c optional sc -dev/syscons/scterm-dumb.c optional sc -dev/syscons/scterm-sc.c optional sc -dev/syscons/scvesactl.c optional sc vga vesa -dev/syscons/scvgarndr.c optional sc vga -dev/syscons/scvidctl.c optional sc -dev/syscons/scvtb.c optional sc -dev/syscons/syscons.c optional sc -dev/syscons/sysmouse.c optional sc -dev/uart/uart_cpu_amd64.c optional uart -isa/atkbd_isa.c optional atkbd - amd64/acpica/OsdEnvironment.c optional acpi amd64/acpica/acpi_machdep.c optional acpi amd64/acpica/acpi_wakeup.c optional acpi +amd64/acpica/madt.c optional acpi amd64/amd64/amd64-gdbstub.c optional ddb amd64/amd64/amd64_mem.c standard +amd64/amd64/apic_vector.S standard +amd64/amd64/atomic.c standard amd64/amd64/autoconf.c standard amd64/amd64/busdma_machdep.c standard amd64/amd64/cpu_switch.S standard @@ -60,12 +38,21 @@ amd64/amd64/db_trace.c optional ddb amd64/amd64/dump_machdep.c standard amd64/amd64/elf_machdep.c standard amd64/amd64/exception.S standard +amd64/amd64/fpu.c standard amd64/amd64/identcpu.c standard +amd64/amd64/in_cksum.c optional inet amd64/amd64/initcpu.c standard +amd64/amd64/intr_machdep.c standard +amd64/amd64/io_apic.c standard amd64/amd64/legacy.c standard +amd64/amd64/local_apic.c standard amd64/amd64/locore.S standard no-obj amd64/amd64/machdep.c standard amd64/amd64/mem.c standard +amd64/amd64/mp_machdep.c optional smp +amd64/amd64/mpboot.S optional smp +amd64/amd64/mptable.c optional mptable +amd64/amd64/mptable_pci.c optional mptable pci amd64/amd64/nexus.c standard amd64/amd64/pmap.c standard amd64/amd64/sigtramp.S standard @@ -75,72 +62,64 @@ amd64/amd64/trap.c standard amd64/amd64/tsc.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard -amd64/amd64/in_cksum.c optional inet -amd64/amd64/fpu.c standard - +amd64/isa/atpic.c optional atpic isa +amd64/isa/atpic_vector.S optional atpic isa amd64/isa/clock.c standard -amd64/isa/intr_machdep.c standard amd64/isa/isa.c standard amd64/isa/isa_dma.c standard -amd64/isa/ithread.c standard - -amd64/pci/pci_cfgreg.c optional pci +amd64/isa/nmi.c standard amd64/pci/pci_bus.c optional pci - -compat/freebsd32/freebsd32_misc.c optional ia32 -compat/freebsd32/freebsd32_syscalls.c optional ia32 -compat/freebsd32/freebsd32_sysent.c optional ia32 -compat/ia32/ia32_sigtramp.S optional ia32 -compat/ia32/ia32_sysvec.c optional ia32 -amd64/ia32/ia32_signal.c optional ia32 -amd64/ia32/ia32_exception.S optional ia32 -amd64/ia32/ia32_syscall.c optional ia32 -kern/imgact_elf32.c optional ia32 - -# This file tells config what files go into building a kernel, -# files marked standard are always included. -# +amd64/pci/pci_cfgreg.c optional pci dev/fb/fb.c optional fb dev/fb/fb.c optional vga dev/fb/splash.c optional splash dev/fb/vga.c optional vga - dev/kbd/atkbd.c optional atkbd dev/kbd/atkbdc.c optional atkbdc dev/kbd/kbd.c optional atkbd dev/kbd/kbd.c optional kbd dev/kbd/kbd.c optional sc dev/kbd/kbd.c optional ukbd -dev/kbd/kbd.c optional vt - dev/ppc/ppc.c optional ppc - +dev/sio/sio.c optional sio +dev/sio/sio_isa.c optional sio isa +dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/schistory.c optional sc dev/syscons/scmouse.c optional sc -dev/syscons/scterm.c optional sc dev/syscons/scterm-dumb.c optional sc dev/syscons/scterm-sc.c optional sc +dev/syscons/scterm.c optional sc dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvidctl.c optional sc dev/syscons/scvtb.c optional sc dev/syscons/syscons.c optional sc dev/syscons/sysmouse.c optional sc +dev/uart/uart_cpu_amd64.c optional uart geom/geom_bsd.c standard geom/geom_bsd_enc.c standard geom/geom_mbr.c standard geom/geom_mbr_enc.c standard - -#i386/isa/pmtimer.c optional pmtimer -# isa/atkbd_isa.c optional atkbd isa/atkbdc_isa.c optional atkbdc isa/fd.c optional fdc isa/psm.c optional psm isa/syscons_isa.c optional sc isa/vga_isa.c optional vga -pci/agp_intel.c optional agp -pci/agp_via.c optional agp -pci/agp_sis.c optional agp pci/agp_ali.c optional agp pci/agp_amd.c optional agp pci/agp_i810.c optional agp +pci/agp_intel.c optional agp +pci/agp_sis.c optional agp +pci/agp_via.c optional agp +# +# IA32 binary support +# +amd64/ia32/ia32_exception.S optional ia32 +amd64/ia32/ia32_signal.c optional ia32 +amd64/ia32/ia32_syscall.c optional ia32 +compat/freebsd32/freebsd32_misc.c optional ia32 +compat/freebsd32/freebsd32_syscalls.c optional ia32 +compat/freebsd32/freebsd32_sysent.c optional ia32 +compat/ia32/ia32_sigtramp.S optional ia32 +compat/ia32/ia32_sysvec.c optional ia32 +kern/imgact_elf32.c optional ia32 diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64 index af1adb7..ccbe0f6 100644 --- a/sys/conf/options.amd64 +++ b/sys/conf/options.amd64 @@ -1,6 +1,8 @@ # $FreeBSD$ # Options specific to AMD64 platform kernels +AUTO_EOI_1 opt_auto_eoi.h +AUTO_EOI_2 opt_auto_eoi.h MAXMEM PERFMON opt_perfmon.h @@ -29,3 +31,5 @@ PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h IA32 +NO_MIXED_MODE +DEV_ATPIC opt_atpic.h diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 0190b2b..28c3224 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -816,7 +816,7 @@ pci_add_resources(device_t pcib, device_t bus, device_t dev) } if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) { -#if defined(__ia64__) || defined(__i386__) +#if defined(__ia64__) || defined(__i386__) || defined(__amd64__) /* * Try to re-route interrupts. Sometimes the BIOS or * firmware may leave bogus values in these registers. diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 3a71f6e..2dee8e3 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -97,7 +97,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/queue.h> #include <sys/sched.h> -#if defined(SMP) && defined(__i386__) +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) #include <sys/smp.h> #endif #include <machine/critical.h> @@ -124,7 +124,7 @@ choosethread(void) struct thread *td; struct ksegrp *kg; -#if defined(SMP) && defined(__i386__) +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) if (smp_active == 0 && PCPU_GET(cpuid) != 0) { /* Shutting down, run idlethread on AP's */ td = PCPU_GET(idlethread); diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 6cb5dba..28d5605 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -296,7 +296,7 @@ static struct witness_order_list_entry order_lists[] = { { "icu", &lock_class_mtx_spin }, #ifdef SMP { "smp rendezvous", &lock_class_mtx_spin }, -#ifdef __i386__ +#if defined(__i386__) || defined(__amd64__) { "tlb", &lock_class_mtx_spin }, { "lazypmap", &lock_class_mtx_spin }, #endif |