diff options
50 files changed, 1113 insertions, 2298 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index ecc4c67..2160dc4 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -42,7 +42,6 @@ #include <machine/asmacros.h> #include <machine/apicreg.h> -#include <machine/smptests.h> #include "assym.s" @@ -50,19 +49,48 @@ * Macros to create and destroy a trap frame. */ #define PUSH_FRAME \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) #define POP_FRAME \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp + movq TF_RDI(%rsp),%rdi ; \ + movq TF_RSI(%rsp),%rsi ; \ + movq TF_RDX(%rsp),%rdx ; \ + movq TF_RCX(%rsp),%rcx ; \ + movq TF_R8(%rsp),%r8 ; \ + movq TF_R9(%rsp),%r9 ; \ + movq TF_RAX(%rsp),%rax ; \ + movq TF_RBX(%rsp),%rbx ; \ + movq TF_RBP(%rsp),%rbp ; \ + movq TF_R10(%rsp),%r10 ; \ + movq TF_R11(%rsp),%r11 ; \ + movq TF_R12(%rsp),%r12 ; \ + movq TF_R13(%rsp),%r13 ; \ + movq TF_R14(%rsp),%r14 ; \ + movq TF_R15(%rsp),%r15 ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* keep kernel GS.base */ \ + cli ; \ + swapgs ; \ +1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ + /* * I/O Interrupt Entry Point. Rather than having one entry point for @@ -76,21 +104,15 @@ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \ - mov %ax, %fs ; \ - movl lapic, %edx ; /* pointer to local APIC */ \ - movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ + movq lapic, %rdx ; /* pointer to local APIC */ \ + movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ bsrl %eax, %eax ; /* index of highset set bit in ISR */ \ jz 2f ; \ addl $(32 * index),%eax ; \ 1: ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid double count */ \ - pushl %eax ; /* pass the IRQ */ \ + movq %rax, %rdi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ - addl $4, %esp ; /* discard parameter */ \ MEXITCOUNT ; \ jmp doreti ; \ 2: movl $-1, %eax ; /* send a vector of -1 */ \ @@ -109,7 +131,7 @@ IDTVEC(spuriousint) /* No EOI cycle used here */ - iret + iretq MCOUNT_LABEL(bintr2) ISR_VEC(1, apic_isr1) @@ -128,32 +150,19 @@ MCOUNT_LABEL(eintr2) .text SUPERALIGN_TEXT IDTVEC(invltlb) - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_gbl(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ + pushq %rax - movl %cr3, %eax /* invalidate the TLB */ - movl %eax, %cr3 + movq %cr3, %rax /* invalidate the TLB */ + movq %rax, %cr3 - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %eax - iret + popq %rax + iretq /* * Single page TLB shootdown @@ -161,32 +170,19 @@ IDTVEC(invltlb) .text SUPERALIGN_TEXT IDTVEC(invlpg) - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_pg(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ + pushq %rax - movl smp_tlb_addr1, %eax - invlpg (%eax) /* invalidate single page */ + movq smp_tlb_addr1, %rax + invlpg (%rax) /* invalidate single page */ - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %eax - iret + popq %rax + iretq /* * Page range TLB shootdown. @@ -194,38 +190,25 @@ IDTVEC(invlpg) .text SUPERALIGN_TEXT IDTVEC(invlrng) - pushl %eax - pushl %edx - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_rng(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %edx - movl smp_tlb_addr2, %eax -1: invlpg (%edx) /* invalidate single page */ - addl $PAGE_SIZE, %edx - cmpl %eax, %edx + pushq %rax + pushq %rdx + + movq smp_tlb_addr1, %rdx + movq smp_tlb_addr2, %rax +1: invlpg (%rdx) /* invalidate single page */ + addq $PAGE_SIZE, %rdx + cmpq %rax, %rdx jb 1b - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait - popl %ds - popl %edx - popl %eax - iret + popq %rdx + popq %rax + iretq /* * Forward hardclock to another CPU. Pushes a clockframe and calls @@ -235,18 +218,11 @@ IDTVEC(invlrng) SUPERALIGN_TEXT IDTVEC(hardclock) PUSH_FRAME - movl $KDSEL, %eax /* reload with kernel's data segment */ - mov %ax, %ds - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ - pushl $0 /* XXX convert trapframe to clockframe */ call forwarded_hardclock - addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti @@ -258,20 +234,13 @@ IDTVEC(hardclock) SUPERALIGN_TEXT IDTVEC(statclock) PUSH_FRAME - movl $KDSEL, %eax /* reload with kernel's data segment */ - mov %ax, %ds - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) - pushl $0 /* XXX convert trapframe to clockframe */ call forwarded_statclock - addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti @@ -287,14 +256,9 @@ IDTVEC(statclock) SUPERALIGN_TEXT IDTVEC(cpuast) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + movq lapic, %rdx + movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) @@ -311,63 +275,41 @@ IDTVEC(cpuast) .text SUPERALIGN_TEXT IDTVEC(cpustop) - pushl %ebp - movl %esp, %ebp - pushl %eax - pushl %ecx - pushl %edx - pushl %ds /* save current data segment */ - pushl %es - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + PUSH_FRAME + + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ movl PCPU(CPUID), %eax imull $PCB_SIZE, %eax - leal CNAME(stoppcbs)(%eax), %eax - pushl %eax - call CNAME(savectx) /* Save process context */ - addl $4, %esp + leaq stoppcbs(%rax), %rdi + call savectx /* Save process context */ movl PCPU(CPUID), %eax lock - btsl %eax, CNAME(stopped_cpus) /* stopped_cpus |= (1<<id) */ + btsl %eax, stopped_cpus /* stopped_cpus |= (1<<id) */ 1: - btl %eax, CNAME(started_cpus) /* while (!(started_cpus & (1<<id))) */ + btl %eax, started_cpus /* while (!(started_cpus & (1<<id))) */ jnc 1b lock - btrl %eax, CNAME(started_cpus) /* started_cpus &= ~(1<<id) */ + btrl %eax, started_cpus /* started_cpus &= ~(1<<id) */ lock - btrl %eax, CNAME(stopped_cpus) /* stopped_cpus &= ~(1<<id) */ + btrl %eax, stopped_cpus /* stopped_cpus &= ~(1<<id) */ test %eax, %eax jnz 2f - movl CNAME(cpustop_restartfunc), %eax - test %eax, %eax + movq cpustop_restartfunc, %rax + testq %rax, %rax jz 2f - movl $0, CNAME(cpustop_restartfunc) /* One-shot */ + movq $0, cpustop_restartfunc /* One-shot */ - call *%eax + call *%rax 2: - popl %fs - popl %es - popl %ds /* restore previous data segment */ - popl %edx - popl %ecx - popl %eax - movl %ebp, %esp - popl %ebp - iret + POP_FRAME + iretq /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. @@ -378,19 +320,13 @@ IDTVEC(cpustop) SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - call smp_rendezvous_action - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - POP_FRAME - iret + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + POP_FRAME /* Why not doreti? */ + iretq +#ifdef LAZY_SWITCH /* * Clean up when we lose out on the lazy context switch optimization. * ie: when we are about to release a PTD but a cpu is still borrowing it. @@ -398,16 +334,10 @@ IDTVEC(rendezvous) SUPERALIGN_TEXT IDTVEC(lazypmap) PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs - call pmap_lazyfix_action - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - POP_FRAME - iret + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + POP_FRAME /* Why not doreti? */ + iretq +#endif #endif /* SMP */ diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c index d6ce6b6..adec2e0 100644 --- a/sys/amd64/amd64/autoconf.c +++ b/sys/amd64/amd64/autoconf.c @@ -76,7 +76,6 @@ __FBSDID("$FreeBSD$"); #include <nfsclient/nfsdiskless.h> #include <machine/md_var.h> -#include <amd64/isa/icu.h> #ifdef DEV_ISA #include <isa/isavar.h> @@ -109,23 +108,11 @@ configure(dummy) { /* - * Activate the ICU's. Note that we are explicitly at splhigh() - * at present as we have no way to disable stray PCI level triggered - * interrupts until the devices have had a driver attached. This - * is particularly a problem when the interrupts are shared. For - * example, if IRQ 10 is shared between a disk and network device - * and the disk device generates an interrupt, if we "activate" - * IRQ 10 when the network driver is set up, then we will get - * recursive interrupt 10's as nothing will know how to turn off - * the disk device's interrupt. - * - * Having the ICU's active means we can probe interrupt routing to - * see if a device causes the corresponding pending bit to be set. - * - * This is all rather inconvenient. + * Enable interrupts on the processor. The interrupts are still + * disabled in the interrupt controllers until interrupt handlers + * are registered. */ enable_intr(); - INTREN(IRQ_SLAVE); /* nexus0 is the top of the i386 device tree */ device_add_child(root_bus, "nexus", 0); @@ -141,12 +128,6 @@ configure(dummy) if (isa_bus_device) isa_probe_children(isa_bus_device); #endif - - /* - * Now we're ready to handle (pending) interrupts. - * XXX this is slightly misplaced. - */ - spl0(); } static void diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 56f0c84..3bfcfc8 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -59,14 +59,16 @@ * %rsi = newtd */ ENTRY(cpu_throw) - xorq %rax, %rax movl PCPU(CPUID), %eax testq %rdi,%rdi /* no thread? */ jz 1f /* release bit from old pm_active */ movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ - btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ +#ifdef SMP + lock +#endif + btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ movq PCB_CR3(%rdx),%rdx @@ -74,7 +76,10 @@ ENTRY(cpu_throw) /* set bit in new pm_active */ movq TD_PROC(%rsi),%rdx movq P_VMSPACE(%rdx), %rdx - btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ +#ifdef SMP + lock +#endif + btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ jmp sw1 /* @@ -143,7 +148,6 @@ ENTRY(cpu_switch) jz badsw3 /* no, panic */ #endif movq TD_PCB(%rsi),%r8 - xorq %rax, %rax movl PCPU(CPUID), %eax /* switch address space */ @@ -153,12 +157,18 @@ ENTRY(cpu_switch) /* Release bit from old pmap->pm_active */ movq TD_PROC(%rdi), %rdx /* oldproc */ movq P_VMSPACE(%rdx), %rdx - btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ +#ifdef SMP + lock +#endif + btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx - btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ +#ifdef SMP + lock +#endif + btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ sw1: /* @@ -191,8 +201,11 @@ sw1: wrmsr /* Update the TSS_RSP0 pointer for the next interrupt */ + movq PCPU(TSSP), %rax + addq $COMMON_TSS_RSP0, %rax leaq -16(%r8), %rbx - movq %rbx, common_tss + COMMON_TSS_RSP0 + movq %rbx, (%rax) + movq %rbx, PCPU(RSP0) /* Restore context. */ movq PCB_RBX(%r8),%rbx diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c index 3dd6a8a..077c914 100644 --- a/sys/amd64/amd64/db_interface.c +++ b/sys/amd64/amd64/db_interface.c @@ -98,6 +98,22 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) ef = read_rflags(); disable_intr(); +#ifdef SMP + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf("\nCPU%d stopping CPUs: 0x%08x...", PCPU_GET(cpuid), + PCPU_GET(other_cpus)); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + + /* We stop all CPUs except ourselves (obviously) */ + stop_cpus(PCPU_GET(other_cpus)); + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf(" stopped.\n"); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + +#endif /* SMP */ + switch (type) { case T_BPTFLT: /* breakpoint */ case T_TRCTRAP: /* debug exception */ @@ -192,6 +208,27 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) regs->tf_ds = ddb_regs.tf_ds & 0xffff; #endif +#ifdef SMP + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf("\nCPU%d restarting CPUs: 0x%08x...", PCPU_GET(cpuid), + stopped_cpus); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + + /* Restart all the CPUs we previously stopped */ + if (stopped_cpus != PCPU_GET(other_cpus) && smp_started != 0) { + db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", + PCPU_GET(other_cpus), stopped_cpus); + panic("stop_cpus() failed"); + } + restart_cpus(stopped_cpus); + +#if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) + db_printf(" restarted.\n"); +#endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ + +#endif /* SMP */ + write_rflags(ef); return (1); diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c index a05348a..7dba9bb 100644 --- a/sys/amd64/amd64/db_trace.c +++ b/sys/amd64/amd64/db_trace.c @@ -245,8 +245,9 @@ db_nextframe(fp, ip, p) if (strcmp(name, "calltrap") == 0 || strcmp(name, "fork_trampoline") == 0) frame_type = TRAP; - else if (strncmp(name, "Xintr", 5) == 0 || - strncmp(name, "Xfastintr", 9) == 0) + else if (strncmp(name, "Xatpic_intr", 11) == 0 || + strncmp(name, "Xatpic_fastintr", 15) == 0 || + strncmp(name, "Xapic_isr", 9) == 0) frame_type = INTERRUPT; else if (strcmp(name, "Xfast_syscall") == 0) frame_type = SYSCALL; diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 3d2eaa6..972f19c 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -35,14 +35,11 @@ */ #include <machine/asmacros.h> -#include <sys/mutex.h> #include <machine/psl.h> #include <machine/trap.h> #include "assym.s" -#define SEL_RPL_MASK 0x0003 - .text /*****************************************************************************/ @@ -72,8 +69,6 @@ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. */ -#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ - .type __CONCAT(X,name),@function; __CONCAT(X,name): MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) @@ -223,7 +218,7 @@ IDTVEC(page) IDTVEC(fast_syscall) swapgs movq %rsp,PCPU(SCRATCH_RSP) - movq common_tss+COMMON_TSS_RSP0,%rsp + movq PCPU(RSP0),%rsp /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ subq $TF_SIZE,%rsp /* defer TF_RSP till we have a spare register */ @@ -297,14 +292,6 @@ ENTRY(fork_trampoline) call fork_exit jmp doreti /* Handle any ASTs */ - -/* - * Include what was once config+isa-dependent code. - * XXX it should be in a stand-alone file. It's still icu-dependent and - * belongs in i386/isa. - */ -#include "amd64/isa/vector.S" - .data ALIGN_DATA @@ -406,5 +393,3 @@ doreti_iret_fault: movq $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ jmp alltraps_with_regs_pushed - -#include "amd64/isa/icu_ipl.S" diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 1e4890c..1acb931 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/psl.h> @@ -63,8 +64,6 @@ __FBSDID("$FreeBSD$"); #include <machine/segments.h> #include <machine/ucontext.h> -#include <amd64/isa/intr_machdep.h> - /* * Floating point support. */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 6a017e5..27a1a12 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -69,10 +69,12 @@ __FBSDID("$FreeBSD$"); #include <nfs/rpcv2.h> #include <nfsclient/nfs.h> #include <nfsclient/nfsdiskless.h> +#include <machine/apicreg.h> #include <machine/cpu.h> #include <machine/sigframe.h> #include <machine/proc.h> #include <machine/specialreg.h> +#include <machine/segments.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -83,11 +85,6 @@ ASSYM(P_UAREA, offsetof(struct proc, p_uarea)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); -ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); -ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); -ASSYM(TD_MD, offsetof(struct thread, td_md)); - -ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); @@ -180,6 +177,7 @@ ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); +ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MAXPATHLEN, MAXPATHLEN); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); @@ -189,12 +187,24 @@ ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp)); +ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); +ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp)); +ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0)); + +ASSYM(LA_VER, offsetof(struct LAPIC, version)); +ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); +ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); +ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); +ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); +ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); +ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL)); +ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); ASSYM(MSR_FSBASE, MSR_FSBASE); ASSYM(MSR_GSBASE, MSR_GSBASE); diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index ba8e58e..f3d70c2 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -55,12 +55,13 @@ __FBSDID("$FreeBSD$"); #include <machine/asmacros.h> #include <machine/clock.h> #include <machine/cputypes.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/segments.h> #include <machine/specialreg.h> #include <machine/md_var.h> #include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> /* XXX - should be in header file: */ void printcpuinfo(void); diff --git a/sys/amd64/amd64/io_apic.c b/sys/amd64/amd64/io_apic.c index 4af70fa..b620440 100644 --- a/sys/amd64/amd64/io_apic.c +++ b/sys/amd64/amd64/io_apic.c @@ -30,6 +30,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_atpic.h" #include "opt_isa.h" #include "opt_no_mixed_mode.h" @@ -50,8 +51,8 @@ __FBSDID("$FreeBSD$"); #include <machine/apicvar.h> #include <machine/segments.h> -#if defined(DEV_ISA) && !defined(NO_MIXED_MODE) -#define MIXED_MODE +#if defined(DEV_ISA) && defined(DEV_ATPIC) && !defined(NO_MIXED_MODE) +#define MIXED_MODE #endif #define IOAPIC_ISA_INTS 16 diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c index 6f942bf..bdff518 100644 --- a/sys/amd64/amd64/local_apic.c +++ b/sys/amd64/amd64/local_apic.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/kernel.h> #include <sys/pcpu.h> +#include <sys/proc.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -171,8 +172,7 @@ lapic_init(uintptr_t addr) KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t)); - setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); /* Perform basic initialization of the BSP's local APIC. */ value = lapic->svr; @@ -242,8 +242,7 @@ lapic_enable_intr(u_int irq) KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for IRQ %u", irq)); - setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(vector, ioint_handlers[vector / 32], SDT_SYSIGT, SEL_KPL, 0); } void @@ -478,13 +477,14 @@ lapic_eoi(void) } void -lapic_handle_intr(struct intrframe frame) +lapic_handle_intr(void *cookie, struct intrframe frame) { struct intsrc *isrc; + int vec = (uintptr_t)cookie; - if (frame.if_vec == -1) + if (vec == -1) panic("Couldn't get vector from ISR!"); - isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec)); + isrc = intr_lookup_source(apic_idt_to_irq(vec)); intr_execute_handlers(isrc, &frame); } @@ -589,21 +589,9 @@ static void apic_setup_local(void *dummy __unused) { int retval; - uint64_t apic_base; if (best_enum == NULL) return; - /* - * To work around an errata, we disable the local APIC on some - * CPUs during early startup. We need to turn the local APIC back - * on on such CPUs now. - */ - if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xff0) == 0x610) { - apic_base = rdmsr(MSR_APICBASE); - apic_base |= APICBASE_ENABLED; - wrmsr(MSR_APICBASE, apic_base); - } retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e32d82a..2140d7a 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_atalk.h" +#include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" @@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$"); #include <machine/reg.h> #include <machine/clock.h> #include <machine/specialreg.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/metadata.h> #include <machine/proc.h> @@ -108,9 +110,13 @@ __FBSDID("$FreeBSD$"); #include <machine/perfmon.h> #endif #include <machine/tss.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> + +#include <isa/isareg.h> #include <isa/rtc.h> #include <sys/ptrace.h> #include <machine/sigframe.h> @@ -146,7 +152,9 @@ vm_paddr_t phys_avail[10]; struct kva_md_info kmi; static struct trapframe proc0_tf; -static struct pcpu __pcpu; +struct region_descriptor r_gdt, r_idt; + +struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; @@ -196,7 +204,6 @@ cpu_startup(dummy) bufinit(); vm_pager_bufferinit(); - /* For SMP, we delay the cpu_setregs() until after SMP startup. */ cpu_setregs(); } @@ -589,13 +596,13 @@ SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, * Initialize segments & interrupt table */ -struct user_segment_descriptor gdt[NGDT];/* global descriptor table */ +struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); -struct amd64tss common_tss; +struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { @@ -755,6 +762,15 @@ ssdtosyssd(ssd, sd) sd->sd_gran = ssd->ssd_gran; } +#if !defined(DEV_ATPIC) && defined(DEV_ISA) +#include <isa/isavar.h> +u_int +isa_irq_pending(void) +{ + + return (0); +} +#endif #define PHYSMAP_SIZE (2 * 8) @@ -783,7 +799,6 @@ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx; - u_int extmem; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; @@ -802,12 +817,9 @@ getmemsize(caddr_t kmdp, u_int64_t first) * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); - if (smapbase == 0) - smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | 0x0009); /* Old value for MODINFOMD_SMAP */ - if (smapbase == 0) { + if (smapbase == NULL) panic("No BIOS smap info from loader!"); - goto deep_shit; - } + smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); @@ -816,14 +828,11 @@ getmemsize(caddr_t kmdp, u_int64_t first) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); - if (smap->type != 0x01) { + if (smap->type != 0x01) continue; - } - if (smap->length == 0) { -next_run: + if (smap->length == 0) continue; - } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -836,6 +845,7 @@ next_run: if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; +next_run: continue; } @@ -850,69 +860,23 @@ next_run: } /* - * Perform "base memory" related probes & setup based on SMAP + * Find the 'base memory' segment for SMP */ -deep_shit: - if (basemem == 0) { - for (i = 0; i <= physmap_idx; i += 2) { - if (physmap[i] == 0x00000000) { - basemem = physmap[i + 1] / 1024; - break; - } - } - - if (basemem == 0) { - basemem = rtcin(RTC_BASELO) + (rtcin(RTC_BASEHI) << 8); - } - - if (basemem == 0) { - basemem = 640; - } - - if (basemem > 640) { - printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", - basemem); - basemem = 640; + basemem = 0; + for (i = 0; i <= physmap_idx; i += 2) { + if (physmap[i] == 0x00000000) { + basemem = physmap[i + 1] / 1024; + break; } - -#if 0 - for (pa = trunc_page(basemem * 1024); - pa < ISA_HOLE_START; pa += PAGE_SIZE) - pmap_kenter(KERNBASE + pa, pa); -#endif } + if (basemem == 0) + panic("BIOS smap did not include a basemem segment!"); - if (physmap[1] != 0) - goto physmap_done; - - /* - * Prefer the RTC value for extended memory. - */ - extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); - - /* - * Special hack for chipsets that still remap the 384k hole when - * there's 16MB of memory - this really confuses people that - * are trying to use bus mastering ISA controllers with the - * "16MB limit"; they only have 16MB, but the remapping puts - * them beyond the limit. - * - * If extended memory is between 15-16MB (16-17MB phys address range), - * chop it to 15MB. - */ - if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) - extmem = 15 * 1024; - - physmap[0] = 0; - physmap[1] = basemem * 1024; - physmap_idx = 2; - physmap[physmap_idx] = 0x100000; - physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; +#ifdef SMP + /* make hole for AP bootstrap code */ + physmap[1] = mp_bootaddress(physmap[1] / 1024); +#endif -physmap_done: - /* - * Now, physmap contains a map of physical memory. - */ /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be @@ -929,7 +893,8 @@ physmap_done: * hw.physmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ - if ((cp = getenv("hw.physmem")) != NULL) { + cp = getenv("hw.physmem"); + if (cp != NULL) { u_int64_t AllowMem, sanity; char *ep; @@ -1106,11 +1071,18 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, off, x; - struct region_descriptor r_gdt, r_idt; struct pcpu *pc; u_int64_t msr; char *env; +#ifdef DEV_ISA + /* Preemptively mask the atpics and leave them shut down */ + outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); + outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); +#else +#error "have you forgotten the isa device?"; +#endif + /* Turn on PTE NX (no execute) bit */ msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); @@ -1146,7 +1118,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) /* * make gdt memory segments */ - gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss; + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) @@ -1157,7 +1129,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); - pc = &__pcpu; + pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); @@ -1166,6 +1138,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); + PCPU_SET(tssp, &common_tss[0]); /* * Initialize mutexes. @@ -1211,8 +1184,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) */ cninit(); -#ifdef DEV_ISA - isa_defaultirq(); +#ifdef DEV_ATPIC + atpic_startup(); #endif #ifdef DDB @@ -1225,12 +1198,14 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ - common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); + common_tss[0].tss_rsp0 = thread0.td_kstack + \ + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); /* Ensure the stack is aligned to 16 bytes */ - common_tss.tss_rsp0 &= ~0xF; + common_tss[0].tss_rsp0 &= ~0xF; + PCPU_SET(rsp0, common_tss[0].tss_rsp0); /* doublefault stack space, runs on ist1 */ - common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index 56d268c..aeb2e35 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -323,6 +323,15 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); } +#ifdef SMP +void +mem_range_AP_init(void) +{ + if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) + (mem_range_softc.mr_op->initAP(&mem_range_softc)); +} +#endif + static int mem_modevent(module_t mod, int type, void *data) { diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f58a94f..fcd478b 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 1996, by Steve Passe + * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,30 +27,12 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" -#if !defined(lint) -#if !defined(SMP) -#error How did you get here? -#endif - -#if defined(I386_CPU) && !defined(COMPILING_LINT) -#error SMP not supported with I386_CPU -#endif -#ifndef DEV_APIC -#error The apic device is required for SMP, add "device apic" to your config file. -#endif -#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) -#error SMP not supported with CPU_DISABLE_CMPXCHG -#endif -#endif /* not lint */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> -#include <sys/cons.h> /* cngetc() */ #ifdef GPROF #include <sys/gmon.h> #endif @@ -75,9 +58,8 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/smp.h> -#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */ #include <machine/specialreg.h> -#include <machine/privatespace.h> +#include <machine/tss.h> #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) @@ -88,67 +70,9 @@ __FBSDID("$FreeBSD$"); #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) -/* - * this code MUST be enabled here and in mpboot.s. - * it follows the very early stages of AP boot by placing values in CMOS ram. - * it NORMALLY will never be needed and thus the primitive method for enabling. - * -#define CHECK_POINTS - */ - -#if defined(CHECK_POINTS) && !defined(PC98) -#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) -#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) - -#define CHECK_INIT(D); \ - CHECK_WRITE(0x34, (D)); \ - CHECK_WRITE(0x35, (D)); \ - CHECK_WRITE(0x36, (D)); \ - CHECK_WRITE(0x37, (D)); \ - CHECK_WRITE(0x38, (D)); \ - CHECK_WRITE(0x39, (D)); - -#define CHECK_PRINT(S); \ - printf("%s: %d, %d, %d, %d, %d, %d\n", \ - (S), \ - CHECK_READ(0x34), \ - CHECK_READ(0x35), \ - CHECK_READ(0x36), \ - CHECK_READ(0x37), \ - CHECK_READ(0x38), \ - CHECK_READ(0x39)); - -#else /* CHECK_POINTS */ - -#define CHECK_INIT(D) -#define CHECK_PRINT(S) -#define CHECK_WRITE(A, D) - -#endif /* CHECK_POINTS */ - -/* - * Values to send to the POST hardware. - */ -#define MP_BOOTADDRESS_POST 0x10 -#define MP_PROBE_POST 0x11 -#define MPTABLE_PASS1_POST 0x12 - -#define MP_START_POST 0x13 -#define MP_ENABLE_POST 0x14 -#define MPTABLE_PASS2_POST 0x15 - -#define START_ALL_APS_POST 0x16 -#define INSTALL_AP_TRAMP_POST 0x17 -#define START_AP_POST 0x18 - -#define MP_ANNOUNCE_POST 0x19 - /* lock region used by kernel profiling */ int mcount_lock; -/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ -int current_postcode; - int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern int nkpt; @@ -164,6 +88,9 @@ struct cpu_top *smp_topology; char *bootSTK; static int bootAP; +/* Free these after use */ +void *bootstacks[MAXCPU]; + /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -178,6 +105,8 @@ vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; struct mtx smp_tlb_mtx; +extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); + /* * Local data and functions. */ @@ -201,17 +130,17 @@ struct cpu_info { } static cpu_info[MAXCPU]; static int cpu_apic_ids[MAXCPU]; -static u_int boot_address; +static u_int boot_address; static void set_logical_apic_ids(void); static int start_all_aps(void); -static void install_ap_tramp(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static int hlt_cpus_mask; static int hlt_logical_cpus; static struct sysctl_ctx_list logical_cpu_clist; +static u_int bootMP_size; /* * Calculate usable address in base memory for AP trampoline code. @@ -219,13 +148,15 @@ static struct sysctl_ctx_list logical_cpu_clist; u_int mp_bootaddress(u_int basemem) { - POSTCODE(MP_BOOTADDRESS_POST); - boot_address = trunc_page(basemem); /* round down to 4k boundary */ + bootMP_size = mptramp_end - mptramp_start; + boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ if ((basemem - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ + /* 3 levels of page table pages */ + mptramp_pagetables = boot_address - (PAGE_SIZE * 3); - return boot_address; + return mptramp_pagetables; } void @@ -302,43 +233,34 @@ cpu_mp_start(void) { int i; - POSTCODE(MP_START_POST); - /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) cpu_apic_ids[i] = -1; /* Install an inter-CPU IPI for TLB invalidation */ - setidt(IPI_INVLTLB, IDTVEC(invltlb), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLPG, IDTVEC(invlpg), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLRNG, IDTVEC(invlrng), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); + setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forwarding hardclock() */ - setidt(IPI_HARDCLOCK, IDTVEC(hardclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_HARDCLOCK, IDTVEC(hardclock), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forwarding statclock() */ - setidt(IPI_STATCLOCK, IDTVEC(statclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_STATCLOCK, IDTVEC(statclock), SDT_SYSIGT, SEL_KPL, 0); +#ifdef LAZY_SWITCH /* Install an inter-CPU IPI for lazy pmap release */ - setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYSIGT, SEL_KPL, 0); +#endif /* Install an inter-CPU IPI for all-CPU rendezvous */ - setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for forcing an additional software trap */ - setidt(IPI_AST, IDTVEC(cpuast), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_AST, IDTVEC(cpuast), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ - setidt(IPI_STOP, IDTVEC(cpustop), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN); @@ -371,8 +293,6 @@ cpu_mp_announce(void) { int i, x; - POSTCODE(MP_ANNOUNCE_POST); - /* List CPUs */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1, x = 0; x < MAXCPU; x++) { @@ -390,38 +310,41 @@ cpu_mp_announce(void) void init_secondary(void) { - int gsel_tss; - int x, myid; - u_int cr0; - - /* bootAP is set in start_ap() to our ID. */ - myid = bootAP; - gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; - gdt_segs[GPROC0_SEL].ssd_base = - (int) &SMP_prvspace[myid].pcpu.pc_common_tss; - SMP_prvspace[myid].pcpu.pc_prvspace = - &SMP_prvspace[myid].pcpu; - - for (x = 0; x < NGDT; x++) { - ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); - } + struct pcpu *pc; + u_int64_t msr, cr0; + int cpu, gsel_tss; + + /* Set by the startup code for us to use */ + cpu = bootAP; + + /* Init tss */ + common_tss[cpu] = common_tss[0]; + common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ + + gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); - r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; - r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ - lidt(&r_idt); + /* Get per-cpu data */ + pc = &__pcpu[cpu]; + + /* prime data page for it to use */ + pcpu_init(pc, cpu, sizeof(struct pcpu)); + pc->pc_apic_id = cpu_apic_ids[cpu]; + pc->pc_prvspace = pc; + pc->pc_curthread = 0; + pc->pc_tssp = &common_tss[cpu]; + pc->pc_rsp0 = 0; - lldt(_default_ldt); - PCPU_SET(currentldt, _default_ldt); + wrmsr(MSR_FSBASE, 0); /* User value */ + wrmsr(MSR_GSBASE, (u_int64_t)pc); + wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ + + lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); - gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ - PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); - PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); - PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); - PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); /* @@ -432,32 +355,32 @@ init_secondary(void) cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); - CHECK_WRITE(0x38, 5); - - /* Disable local APIC just to be sure. */ + + /* Set up the fast syscall stuff */ + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + + /* Disable local apic just to be sure. */ lapic_disable(); /* signal our startup to the BSP. */ mp_naps++; - CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); - /* BSP may have changed PTD while we were waiting */ - invltlb(); - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - lidt(&r_idt); -#endif - /* set up CPU registers and state */ cpu_setregs(); /* set up FPU state on the AP */ - npxinit(__INITIAL_NPXCW__); + fpuinit(); /* set up SSE registers */ enable_sse(); @@ -467,7 +390,6 @@ init_secondary(void) printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); - printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]); panic("cpuid mismatch! boom!!"); } @@ -559,39 +481,51 @@ set_logical_apic_ids(void) static int start_all_aps(void) { -#ifndef PC98 u_char mpbiosreason; -#endif - u_long mpbioswarmvec; - struct pcpu *pc; - char *stack; - uintptr_t kptbase; - int i, pg, apic_id, cpu; - - POSTCODE(START_ALL_APS_POST); + u_int32_t mpbioswarmvec; + int apic_id, cpu, i; + u_int64_t *pt4, *pt3, *pt2; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ - install_ap_tramp(); + pmap_kenter(boot_address + KERNBASE, boot_address); + bcopy(mptramp_start, (void *)((uintptr_t)boot_address + KERNBASE), bootMP_size); + + /* Locate the page tables, they'll be below the trampoline */ + pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); + pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); + pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); + + /* Create the initial 1GB replicated page tables */ + for (i = 0; i < 512; i++) { + /* Each slot of the level 4 pages points to the same level 3 page */ + pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); + pt4[i] |= PG_V | PG_RW | PG_U; + + /* Each slot of the level 3 pages points to the same level 2 page */ + pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); + pt3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + pt2[i] = i * (2 * 1024 * 1024); + pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } /* save the current value of the warm-start vector */ - mpbioswarmvec = *((u_long *) WARMBOOT_OFF); -#ifndef PC98 + mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); -#endif - /* set up temporary P==V mapping for AP boot */ - /* XXX this is a hack, we should boot the AP on its own stack/PTD */ - kptbase = (uintptr_t)(void *)KPTphys; - for (i = 0; i < NKPT; i++) - PTD[i] = (pd_entry_t)(PG_V | PG_RW | - ((kptbase + i * PAGE_SIZE) & PG_FRAME)); - invltlb(); + /* setup a vector to our boot code */ + *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* start each AP */ - for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { + cpu = 0; + for (apic_id = 0; apic_id < MAXCPU; apic_id++) { if (!cpu_info[apic_id].cpu_present || cpu_info[apic_id].cpu_bsp) continue; @@ -600,48 +534,18 @@ start_all_aps(void) /* save APIC ID for this logical ID */ cpu_apic_ids[cpu] = apic_id; - /* first page of AP's private space */ - pg = cpu * i386_btop(sizeof(struct privatespace)); - - /* allocate a new private data page */ - pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE); - - /* wire it into the private page table page */ - SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc)); - /* allocate and set up an idle stack data page */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */ - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[pg + 1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - - /* prime data page for it to use */ - pcpu_init(pc, cpu, sizeof(struct pcpu)); - pc->pc_apic_id = apic_id; - - /* setup a vector to our boot code */ - *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; - *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); -#ifndef PC98 - outb(CMOS_REG, BIOS_RESET); - outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ -#endif + bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES * - PAGE_SIZE]; + bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ - CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { - printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); - CHECK_PRINT("trace"); /* show checkpoints */ - /* better panic as the AP may be running loose */ - printf("panic y/n? [y] "); - if (cngetc() != 'n') - panic("bye-bye"); + /* restore the warmstart vector */ + *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; + panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } - CHECK_PRINT("trace"); /* show checkpoints */ all_cpus |= (1 << cpu); /* record AP in CPU map */ } @@ -650,92 +554,15 @@ start_all_aps(void) PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); /* restore the warmstart vector */ - *(u_long *) WARMBOOT_OFF = mpbioswarmvec; -#ifndef PC98 + *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; + outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); -#endif - - /* - * Set up the idle context for the BSP. Similar to above except - * that some was done by locore, some by pmap.c and some is implicit - * because the BSP is cpu#0 and the page is initially zero and also - * because we can refer to variables by name on the BSP.. - */ - - /* Allocate and setup BSP idle stack */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - - for (i = 0; i < NKPT; i++) - PTD[i] = 0; - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); /* number of APs actually started */ return mp_naps; } -/* - * load the 1st level AP boot code into base memory. - */ - -/* targets for relocation */ -extern void bigJump(void); -extern void bootCodeSeg(void); -extern void bootDataSeg(void); -extern void MPentry(void); -extern u_int MP_GDT; -extern u_int mp_gdtbase; - -static void -install_ap_tramp(void) -{ - int x; - int size = *(int *) ((u_long) & bootMP_size); - u_char *src = (u_char *) ((u_long) bootMP); - u_char *dst = (u_char *) boot_address + KERNBASE; - u_int boot_base = (u_int) bootMP; - u_int8_t *dst8; - u_int16_t *dst16; - u_int32_t *dst32; - - POSTCODE(INSTALL_AP_TRAMP_POST); - - pmap_kenter(boot_address + KERNBASE, boot_address); - for (x = 0; x < size; ++x) - *dst++ = *src++; - - /* - * modify addresses in code we just moved to basemem. unfortunately we - * need fairly detailed info about mpboot.s for this to work. changes - * to mpboot.s might require changes here. - */ - - /* boot code is located in KERNEL space */ - dst = (u_char *) boot_address + KERNBASE; - - /* modify the lgdt arg */ - dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); - *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); - - /* modify the ljmp target for MPentry() */ - dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); - *dst32 = ((u_int) MPentry - KERNBASE); - - /* modify the target for boot code segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; - - /* modify the target for boot data segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; -} /* * This function starts the AP (application processor) identified @@ -750,8 +577,6 @@ start_ap(int apic_id) int vector, ms; int cpus; - POSTCODE(START_AP_POST); - /* calculate the vector */ vector = (boot_address >> 12) & 0xff; @@ -810,50 +635,14 @@ start_ap(int apic_id) DELAY(200); /* wait ~200uS */ /* Wait up to 5 seconds for it to start. */ - for (ms = 0; ms < 5000; ms++) { + for (ms = 0; ms < 50; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ - DELAY(1000); + DELAY(100000); } return 0; /* return FAILURE */ } -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif /* COUNT_XINVLTLB_HITS */ - /* * Flush the TLB on all other CPU's */ @@ -966,69 +755,49 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse void smp_invltlb(void) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } } void smp_invlpg(vm_offset_t addr) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { - if (smp_started) { + + if (smp_started) smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } } void smp_masked_invltlb(u_int mask) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } } void smp_masked_invlpg(u_int mask, vm_offset_t addr) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } } void smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) { - if (smp_started) { + + if (smp_started) smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } } diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index 8f42f6b..ca53a87 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -1,5 +1,5 @@ -/* - * Copyright (c) 1995, Jack F. Vogel +/*- + * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,16 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Jack F. Vogel - * 4. The name of the developer may be used to endorse or promote products - * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -28,257 +23,214 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * mpboot.s: FreeBSD machine support for the Intel MP Spec - * multiprocessor systems. - * * $FreeBSD$ */ -#include "opt_pmap.h" - #include <machine/asmacros.h> /* miscellaneous asm macros */ -#include <machine/apicreg.h> #include <machine/specialreg.h> #include "assym.s" -#define R(x) ((x)-KERNBASE) - -/* - * this code MUST be enabled here and in mp_machdep.c - * it follows the very early stages of AP boot by placing values in CMOS ram. - * it NORMALLY will never be needed and thus the primitive method for enabling. - * -#define CHECK_POINTS - */ - -#if defined(CHECK_POINTS) && !defined(PC98) - -#define CMOS_REG (0x70) -#define CMOS_DATA (0x71) - -#define CHECKPOINT(A,D) \ - movb $(A),%al ; \ - outb %al,$CMOS_REG ; \ - movb $(D),%al ; \ - outb %al,$CMOS_DATA + .data /* So we can modify it */ -#else - -#define CHECKPOINT(A,D) - -#endif /* CHECK_POINTS */ - - -/* - * the APs enter here from their trampoline code (bootMP, below) - */ - .p2align 4 - -NON_GPROF_ENTRY(MPentry) - CHECKPOINT(0x36, 3) + .p2align 4,0 + .globl mptramp_start +mptramp_start: + .code16 /* - * Enable features on this processor. We don't support SMP on - * CPUs older than a Pentium, so we know that we can use the cpuid - * instruction. + * The AP enters here in response to the startup IPI. + * We are in real mode. %cs is the only segment register set. */ - movl $1,%eax - cpuid /* Retrieve features */ - movl %cr4,%eax -#ifndef DISABLE_PSE - testl $CPUID_PSE,%edx - jz 1f - orl $CR4_PSE,%eax /* Enable PSE */ -1: -#endif -#ifndef DISABLE_PG_G - testl $CPUID_PGE,%edx - jz 1f - orl $CR4_PGE,%eax /* Enable PGE */ -1: -#endif - testl $CPUID_VME,%edx - jz 1f - orl $CR4_VME,%eax /* Enable VME */ -1: - movl %eax,%cr4 - - /* Now enable paging mode */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl %eax, %cr3 - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 -#else - movl R(IdlePTD), %eax - movl %eax,%cr3 -#endif - movl %cr0,%eax - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* let the games begin! */ - movl bootSTK,%esp /* boot stack end loc. */ - - pushl $mp_begin /* jump to high mem */ - ret + cli /* make sure no interrupts */ + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss /* - * Wait for the booting CPU to signal startup + * Find relocation base and patch the gdt descript and ljmp targets */ -mp_begin: /* now running relocated at KERNBASE */ - CHECKPOINT(0x37, 4) - call init_secondary /* load i386 tables */ - -/* - * This is the embedded trampoline or bootstrap that is - * copied into 'real-mode' low memory, it is where the - * secondary processor "wakes up". When it is executed - * the processor will eventually jump into the routine - * MPentry, which resides in normal kernel text above - * 1Meg. -jackv - */ + xorl %ebx,%ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ + orl %ebx, lgdt_desc-mptramp_start+2 + orl %ebx, jmp_32-mptramp_start+2 + orl %ebx, jmp_64-mptramp_start+1 - .data - ALIGN_DATA /* just to be sure */ + /* + * Load the descriptor table pointer. We'll need it when running + * in 16 bit protected mode. + */ + lgdt lgdt_desc-mptramp_start -BOOTMP1: + /* Enable protected mode */ + movl $CR0_PE, %eax + mov %eax, %cr0 -NON_GPROF_ENTRY(bootMP) - .code16 - cli - CHECKPOINT(0x34, 1) - /* First guarantee a 'clean slate' */ - xorl %eax, %eax - movl %eax, %ebx - movl %eax, %ecx - movl %eax, %edx - movl %eax, %esi - movl %eax, %edi + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode-gdt, $protmode-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +jmp_32: + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long protmode-mptramp_start /* offset in segment */ + .word bootcode-gdt /* index in gdt for 32 bit code */ - /* set up data segments */ - mov %cs, %ax + /* + * At this point, we are running in 32 bit legacy protected mode. + */ + .code32 +protmode: + mov $bootdata-gdt, %eax mov %ax, %ds - mov %ax, %es - mov %ax, %fs - mov %ax, %gs - mov %ax, %ss - mov $(boot_stk-bootMP), %esp - /* Now load the global descriptor table */ - lgdt MP_GDTptr-bootMP + /* Turn on the PAE, PSE and PGE bits for when paging is enabled */ + mov %cr4, %eax + orl $(CR4_PAE | CR4_PSE), %eax + mov %eax, %cr4 - /* Enable protected mode */ - movl %cr0, %eax - orl $CR0_PE, %eax - movl %eax, %cr0 + /* + * Enable EFER.LME so that we get long mode when all the prereqs are + * in place. In this case, it turns on when CR0_PG is finally enabled. + * Pick up a few other EFER bits that we'll use need we're here. + */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME | EFER_SCE | EFER_NXE, %eax + wrmsr /* - * make intrasegment jump to flush the processor pipeline and - * reload CS register + * Point to the embedded page tables for startup. Note that this + * only gets accessed after we're actually in 64 bit mode, however + * we can only set the bottom 32 bits of %cr3 in this state. This + * means we are required to use a temporary page table that is below + * the 4GB limit. %ebx is still our relocation base. We could just + * subtract 3 * PAGE_SIZE, but that would be too easy. */ - pushl $0x18 - pushl $(protmode-bootMP) - lretl + leal mptramp_pagetables-mptramp_start(%ebx),%eax + movl (%eax), %eax + mov %eax, %cr3 - .code32 -protmode: - CHECKPOINT(0x35, 2) + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 /* - * we are NOW running for the first time with %eip - * having the full physical address, BUT we still - * are using a segment descriptor with the origin - * not matching the booting kernel. + * At this point paging is enabled, and we are in "compatability" mode. + * We do another far jump to reload %cs with the 64 bit selector. + * %cr3 points to a 4-level page table page. + * We cannot yet jump all the way to the kernel because we can only + * specify a 32 bit linear address. So, yet another trampoline. * - * SO NOW... for the BIG Jump into kernel's segment - * and physical text above 1 Meg. + * The following instruction is: + * ljmp $kernelcode-gdt, $tramp_64-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. */ - mov $0x10, %ebx - movw %bx, %ds - movw %bx, %es - movw %bx, %fs - movw %bx, %gs - movw %bx, %ss - - .globl bigJump -bigJump: - /* this will be modified by mpInstallTramp() */ - ljmp $0x08, $0 /* far jmp to MPentry() */ - -dead: hlt /* We should never get here */ - jmp dead - -/* - * MP boot strap Global Descriptor Table - */ - .p2align 4 - .globl MP_GDT - .globl bootCodeSeg - .globl bootDataSeg -MP_GDT: - -nulldesc: /* offset = 0x0 */ +jmp_64: + .byte 0xea /* opcode for far jump */ + .long tramp_64-mptramp_start /* offset in segment */ + .word kernelcode-gdt /* index in gdt for 64 bit code */ - .word 0x0 - .word 0x0 - .byte 0x0 - .byte 0x0 - .byte 0x0 - .byte 0x0 - -kernelcode: /* offset = 0x08 */ - - .word 0xffff /* segment limit 0..15 */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x0 /* segment base 16..23; set for 0K */ - .byte 0x9f /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /* segment base 24..32 */ - -kerneldata: /* offset = 0x10 */ - - .word 0xffff /* segment limit 0..15 */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x0 /* segment base 16..23; set for 0k */ - .byte 0x93 /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /* segment base 24..32 */ + /* + * Yeehar! We're running in 64 bit mode! We can mostly ignore our + * segment registers, and get on with it. + * Note that we are running at the correct virtual address, but with + * a 1:1 1GB mirrored mapping over entire address space. We had better + * switch to a real %cr3 promptly so that we can get to the direct map + * space. Remember that jmp is relative and that we've been relocated, + * so use an indirect jump. + */ + .code64 +tramp_64: + movabsq $entry_64,%rax /* 64 bit immediate load */ + jmp *%rax -bootcode: /* offset = 0x18 */ + .p2align 4,0 +gdt: + /* + * All segment descriptor tables start with a null descriptor + */ + .long 0x00000000 + .long 0x00000000 - .word 0xffff /* segment limit 0..15 */ -bootCodeSeg: /* this will be modified by mpInstallTramp() */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ - .byte 0x9e /* flags; Type */ - .byte 0xcf /* flags; Limit */ - .byte 0x0 /*segment base 24..32 */ + /* + * This is the 64 bit long mode code descriptor. There is no + * 64 bit data descriptor. + */ +kernelcode: + .long 0x00000000 + .long 0x00209800 -bootdata: /* offset = 0x20 */ + /* + * This is the descriptor for the 32 bit boot code. + * %cs: +A, +R, -C, DPL=0, +P, +D, +G + * Accessed, Readable, Present, 32 bit, 4G granularity + */ +bootcode: + .long 0x0000ffff + .long 0x00cf9b00 - .word 0xffff -bootDataSeg: /* this will be modified by mpInstallTramp() */ - .word 0x0000 /* segment base 0..15 */ - .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ - .byte 0x92 - .byte 0xcf - .byte 0x0 + /* + * This is the descriptor for the 32 bit boot data. + * We load it into %ds and %ss. The bits for each selector + * are interpreted slightly differently. + * %ds: +A, +W, -E, DPL=0, +P, +D, +G + * %ss: +A, +W, -E, DPL=0, +P, +B, +G + * Accessed, Writeable, Expand up, Present, 32 bit, 4GB + * For %ds, +D means 'default operand size is 32 bit'. + * For %ss, +B means the stack register is %esp rather than %sp. + */ +bootdata: + .long 0x0000ffff + .long 0x00cf9300 -/* - * GDT pointer for the lgdt call - */ - .globl mp_gdtbase +gdtend: -MP_GDTptr: -mp_gdtlimit: - .word 0x0028 -mp_gdtbase: /* this will be modified by mpInstallTramp() */ + /* + * The address of our page table pages that the boot code + * uses to trampoline up to kernel address space. + */ + .globl mptramp_pagetables +mptramp_pagetables: .long 0 - .space 0x100 /* space for boot_stk - 1st temporary stack */ -boot_stk: + /* + * The pseudo descriptor for lgdt to use. + */ +lgdt_desc: + .word gdtend-gdt /* Length */ + .long gdt-mptramp_start /* Offset plus %ds << 4 */ + + .globl mptramp_end +mptramp_end: -BOOTMP2: - .globl bootMP_size -bootMP_size: - .long BOOTMP2 - BOOTMP1 + /* + * From here on down is executed in the kernel .text section. + * + * Load a real %cr3 that has all the direct map stuff and switches + * off the 1GB replicated mirror. Load a stack pointer and jump + * into AP startup code in C. + */ + .text + .code64 + .p2align 4,0 +entry_64: + movq KPML4phys, %rax + movq %rax, %cr3 + movq bootSTK, %rsp + jmp init_secondary diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 908e65a..f0a9883 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -27,7 +27,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_mptable_force_htt.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> @@ -57,13 +56,8 @@ __FBSDID("$FreeBSD$"); #define NAPICID 32 /* Max number of I/O APIC's */ -#ifdef PC98 -#define BIOS_BASE (0xe8000) -#define BIOS_SIZE (0x18000) -#else #define BIOS_BASE (0xf0000) #define BIOS_SIZE (0x10000) -#endif #define BIOS_COUNT (BIOS_SIZE/4) typedef void mptable_entry_handler(u_char *entry, void *arg); @@ -226,11 +220,12 @@ static int mptable_probe(void) { int x; - u_long segment; + u_int32_t segment; u_int32_t target; /* see if EBDA exists */ - if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { + segment = (u_int32_t) *(u_short *)(KERNBASE + 0x40e); + if (segment != 0) { /* search first 1K of EBDA */ target = (u_int32_t) (segment << 4); if ((x = search_for_sig(target, 1024 / 4)) >= 0) diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c index 952ceaf..1fab16b 100644 --- a/sys/amd64/amd64/nexus.c +++ b/sys/amd64/amd64/nexus.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/module.h> #include <machine/bus.h> +#include <machine/intr_machdep.h> #include <sys/rman.h> #include <sys/interrupt.h> @@ -64,8 +65,6 @@ __FBSDID("$FreeBSD$"); #include <isa/isavar.h> #include <amd64/isa/isa.h> #endif -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> #include <sys/rtprio.h> static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); @@ -156,14 +155,11 @@ nexus_probe(device_t dev) * multi-ISA-bus systems. PCI interrupts are routed to the ISA * component, so in a way, PCI can be a partial child of an ISA bus(!). * APIC interrupts are global though. - * - * XXX We depend on the AT PIC driver correctly claiming IRQ 2 - * to prevent its reuse elsewhere. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; - irq_rman.rm_end = 15; + irq_rman.rm_end = NUM_IO_INTS - 1; if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, irq_rman.rm_start, irq_rman.rm_end)) @@ -428,7 +424,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, if (error) return (error); - error = inthand_add(device_get_nameunit(child), irq->r_start, + error = intr_add_handler(device_get_nameunit(child), irq->r_start, ihand, arg, flags, cookiep); return (error); @@ -437,7 +433,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { - return (inthand_remove(ih)); + return (intr_remove_handler(ih)); } static int diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 4e4c124..b2f0c18 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -118,6 +118,9 @@ __FBSDID("$FreeBSD$"); #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> +#ifdef SMP +#include <sys/smp.h> +#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -134,6 +137,9 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> +#ifdef SMP +#include <machine/smp.h> +#endif #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -163,6 +169,11 @@ struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; +#ifdef LAZY_SWITCH +#ifdef SMP +static struct mtx lazypmap_lock; +#endif +#endif vm_paddr_t avail_start; /* PA of first available physical page */ vm_paddr_t avail_end; /* PA of last available physical page */ @@ -477,6 +488,11 @@ pmap_bootstrap(firstaddr) kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); +#ifdef LAZY_SWITCH +#ifdef SMP + mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN); +#endif +#endif mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); @@ -630,8 +646,121 @@ pmap_track_modified(vm_offset_t va) return 0; } +#ifdef SMP /* - * Normal invalidation functions. + * For SMP, these functions have to use the IPI mechanism for coherence. + */ +void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + u_int cpumask; + u_int other_cpus; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invlpg(va); + smp_invlpg(va); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invlpg(va); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg(pmap->pm_active & other_cpus, va); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} + +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + u_int cpumask; + u_int other_cpus; + vm_offset_t addr; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + smp_invlpg_range(sva, eva); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg_range(pmap->pm_active & other_cpus, + sva, eva); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} + +void +pmap_invalidate_all(pmap_t pmap) +{ + u_int cpumask; + u_int other_cpus; + + if (smp_started) { + if (!(read_rflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_tlb_mtx); + } else + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + * XXX critical sections disable interrupts again + */ + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invltlb(); + smp_invltlb(); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invltlb(); + if (pmap->pm_active & other_cpus) + smp_masked_invltlb(pmap->pm_active & other_cpus); + } + if (smp_started) + mtx_unlock_spin(&smp_tlb_mtx); + else + critical_exit(); +} +#else /* !SMP */ +/* + * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void @@ -659,6 +788,7 @@ pmap_invalidate_all(pmap_t pmap) if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } +#endif /* !SMP */ /* * Are we current address space or kernel? @@ -1208,6 +1338,93 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ +#ifdef LAZY_SWITCH +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static u_int *lazymask; +static register_t lazyptd; +static volatile u_int lazywait; + +void pmap_lazyfix_action(void); + +void +pmap_lazyfix_action(void) +{ + u_int mymask = PCPU_GET(cpumask); + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); + atomic_store_rel_int(&lazywait, 1); +} + +static void +pmap_lazyfix_self(u_int mymask) +{ + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); +} + + +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int mymask = PCPU_GET(cpumask); + u_int mask; + register u_int spins; + + while ((mask = pmap->pm_active) != 0) { + spins = 50000000; + mask = mask & -mask; /* Find least significant set bit */ + mtx_lock_spin(&lazypmap_lock); + lazyptd = vtophys(pmap->pm_pml4); + if (mask == mymask) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(mymask); + } else { + atomic_store_rel_long((u_long *)&lazymask, + (u_long)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + ia32_pause(); + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&lazypmap_lock); + if (spins == 0) + printf("pmap_lazyfix: spun for 50000000\n"); + } +} + +#else /* SMP */ + +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + u_long cr3; + + cr3 = vtophys(pmap->pm_pml4); + if (cr3 == rcr3()) { + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + pmap->pm_active &= ~(PCPU_GET(cpumask)); + } +} +#endif /* SMP */ +#endif + /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1222,6 +1439,9 @@ pmap_release(pmap_t pmap) ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); +#ifdef LAZY_SWITCH + pmap_lazyfix(pmap); +#endif mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); @@ -2777,12 +2997,21 @@ void pmap_activate(struct thread *td) { struct proc *p = td->td_proc; - pmap_t pmap; + pmap_t pmap, oldpmap; u_int64_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); + oldpmap = PCPU_GET(curpmap); +#ifdef SMP +if (oldpmap) /* XXX FIXME */ + atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); + atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); +#else +if (oldpmap) /* XXX FIXME */ + oldpmap->pm_active &= ~PCPU_GET(cpumask); pmap->pm_active |= PCPU_GET(cpumask); +#endif cr3 = vtophys(pmap->pm_pml4); /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 94d7bba..2c0ddf1 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -37,10 +37,21 @@ #include "opt_ddb.h" #include <machine/asmacros.h> +#include <machine/intr_machdep.h> #include <machine/pmap.h> #include "assym.s" + ALIGN_DATA + .globl intrcnt, eintrcnt +intrcnt: + .space INTRCNT_COUNT * 4 +eintrcnt: + + .globl intrnames, eintrnames +intrnames: + .space INTRCNT_COUNT * (MAXCOMLEN + 1) +eintrnames: .text @@ -302,6 +313,9 @@ ENTRY(casuptr) ja fusufault movq %rsi, %rax /* old */ +#ifdef SMP + lock +#endif cmpxchgq %rdx, (%rdi) /* new = %rdx */ /* diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 65ae8f9..0e6b95b 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -80,13 +80,14 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <machine/cpu.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/pcb.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <machine/tss.h> -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> - #include <ddb/ddb.h> extern void trap(struct trapframe frame); @@ -564,6 +565,11 @@ trap_fatal(frame, eva) printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg[type], ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + printf("cpuid = %d; ", PCPU_GET(cpuid)); + printf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%lx\n", eva); printf("fault code = %s %s, %s\n", @@ -631,6 +637,11 @@ void dblfault_handler() { printf("\nFatal double fault\n"); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + printf("cpuid = %d; ", PCPU_GET(cpuid)); + printf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif panic("double fault"); } diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 5485511..6a5b17c 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -77,14 +77,26 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %lu Hz\n", tsc_freq); - - return; } + void init_TSC_tc(void) { +#ifdef SMP + /* + * We can not use the TSC in SMP mode unless the TSCs on all CPUs + * are somehow synchronized. Some hardware configurations do + * this, but we have no way of determining whether this is the + * case, so we do not use the TSC in multi-processor systems + * unless the user indicated (by setting kern.timecounter.smp_tsc + * to 1) that he believes that his TSCs are synchronized. + */ + if (mp_ncpus > 1 && !smp_tsc) + tsc_timecounter.tc_quality = -100; +#endif + if (tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 9b6bc1f..c4f583f 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/mutex.h> #include <sys/sf_buf.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/unistd.h> @@ -80,6 +81,11 @@ __FBSDID("$FreeBSD$"); #include <amd64/isa/isa.h> static void cpu_reset_real(void); +#ifdef SMP +static void cpu_reset_proxy(void); +static u_int cpu_reset_proxyid; +static volatile u_int cpu_reset_proxy_active; +#endif static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) @@ -336,10 +342,69 @@ cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) * Force reset the processor by invalidating the entire address space! */ +#ifdef SMP +static void +cpu_reset_proxy() +{ + + cpu_reset_proxy_active = 1; + while (cpu_reset_proxy_active == 1) + ; /* Wait for other cpu to see that we've started */ + stop_cpus((1<<cpu_reset_proxyid)); + printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); + DELAY(1000000); + cpu_reset_real(); +} +#endif + void cpu_reset() { +#ifdef SMP + if (smp_active == 0) { + cpu_reset_real(); + /* NOTREACHED */ + } else { + + u_int map; + int cnt; + printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid)); + + map = PCPU_GET(other_cpus) & ~ stopped_cpus; + + if (map != 0) { + printf("cpu_reset: Stopping other CPUs\n"); + stop_cpus(map); /* Stop all other CPUs */ + } + + if (PCPU_GET(cpuid) == 0) { + DELAY(1000000); + cpu_reset_real(); + /* NOTREACHED */ + } else { + /* We are not BSP (CPU #0) */ + + cpu_reset_proxyid = PCPU_GET(cpuid); + cpustop_restartfunc = cpu_reset_proxy; + cpu_reset_proxy_active = 0; + printf("cpu_reset: Restarting BSP\n"); + started_cpus = (1<<0); /* Restart CPU #0 */ + + cnt = 0; + while (cpu_reset_proxy_active == 0 && cnt < 10000000) + cnt++; /* Wait for BSP to announce restart */ + if (cpu_reset_proxy_active == 0) + printf("cpu_reset: Failed to restart BSP\n"); + enable_intr(); + cpu_reset_proxy_active = 2; + + while (1); + /* NOTREACHED */ + } + } +#else cpu_reset_real(); +#endif } static void diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c index 991c85b..e32cfcd 100644 --- a/sys/amd64/ia32/ia32_syscall.c +++ b/sys/amd64/ia32/ia32_syscall.c @@ -76,11 +76,9 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <machine/cpu.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> - #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd); diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index 9d5cdff..4dea040 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -32,6 +32,8 @@ #ifndef _MACHINE_APICVAR_H_ #define _MACHINE_APICVAR_H_ +#include <machine/segments.h> + /* * Local && I/O APIC variable definitions. */ @@ -156,7 +158,7 @@ int lapic_intr_pending(u_int vector); void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); -void lapic_handle_intr(struct intrframe frame); +void lapic_handle_intr(void *cookie, struct intrframe frame); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index 9893e5f..404287e 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -138,4 +138,13 @@ #define MEXITCOUNT #endif /* GPROF */ +#ifdef LOCORE +/* + * Convenience macros for declaring interrupt entry points. + */ +#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ + .type __CONCAT(X,name),@function; __CONCAT(X,name): + +#endif /* LOCORE */ + #endif /* !_MACHINE_ASMACROS_H_ */ diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index 0c4d9b0..bc10631 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -82,7 +82,7 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) * For userland, assume the SMP case and use lock prefixes so that * the binaries will run on both types of systems. */ -#if !defined(_KERNEL) +#if defined(SMP) || !defined(_KERNEL) #define MPLOCKED lock ; #else #define MPLOCKED diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h index 359e173..9e3e4e5 100644 --- a/sys/amd64/include/clock.h +++ b/sys/amd64/include/clock.h @@ -15,6 +15,7 @@ * XXX large parts of the driver and its interface are misplaced. */ extern int adjkerntz; +extern int clkintr_pending; extern int disable_rtc_set; extern int pscnt; extern int psdiv; diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 38b07eb..2f32cfd 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -38,6 +38,7 @@ extern long Maxmem; extern u_long atdevbase; /* offset in virtual memory of ISA io mem */ +extern u_int basemem; extern int busdma_swi_pending; extern u_int cpu_exthigh; extern u_int cpu_feature; @@ -50,7 +51,8 @@ extern uint16_t *elan_mmcr; extern char kstack[]; extern char sigcode[]; extern int szsigcode; -extern u_int basemem; + +extern struct pcpu __pcpu[]; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); struct thread; @@ -66,6 +68,7 @@ void enable_sse(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); void pagezero(void *addr); int is_physical_memory(vm_offset_t addr); +int isa_nmi(int cd); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); void fpstate_drop(struct thread *td); diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 293f909..7c3a831 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -83,7 +83,11 @@ #define MACHINE_ARCH "amd64" #endif +#ifdef SMP +#define MAXCPU 8 +#else #define MAXCPU 1 +#endif #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 7ebfbba..9e0383a 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -40,7 +40,11 @@ */ #define PCPU_MD_FIELDS \ struct pcpu *pc_prvspace; /* Self-reference */ \ + struct pmap *pc_curpmap; \ + struct amd64tss *pc_tssp; \ + register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ + u_int pc_apic_id; \ u_int pc_acpi_id /* ACPI CPU id */ #if defined(lint) diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 77a25a4..d6ba652 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -221,7 +221,8 @@ struct md_page { struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - u_long pm_active; /* active on cpus */ + u_int pm_active; /* active on cpus */ + /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ }; diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h index d924eb3..806e9d5 100644 --- a/sys/amd64/include/segments.h +++ b/sys/amd64/include/segments.h @@ -49,6 +49,7 @@ * Selectors */ +#define SEL_RPL_MASK 3 /* requester priv level */ #define ISPL(s) ((s)&3) /* what is the priority level of a selector */ #define SEL_KPL 0 /* kernel priority level */ #define SEL_UPL 3 /* user priority level */ @@ -191,6 +192,7 @@ struct region_descriptor { #define IDT_AC 17 /* #AC: Alignment Check */ #define IDT_MC 18 /* #MC: Machine Check */ #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ +#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ /* @@ -210,6 +212,7 @@ struct region_descriptor { extern struct user_segment_descriptor gdt[]; extern struct soft_segment_descriptor gdt_segs[]; extern struct gate_descriptor *idt; +extern struct region_descriptor r_gdt, r_idt; void lgdt(struct region_descriptor *rdp); void sdtossd(struct user_segment_descriptor *sdp, diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 3d8d117..c6e7fcf 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -13,4 +13,63 @@ #ifndef _MACHINE_SMP_H_ #define _MACHINE_SMP_H_ +#ifdef _KERNEL + +#ifdef SMP + +#ifndef LOCORE + +#include <sys/bus.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> +#include <machine/apicvar.h> + +/* global symbols in mpboot.S */ +extern char mptramp_start[]; +extern char mptramp_end[]; +extern u_int32_t mptramp_pagetables; + +/* global data in mp_machdep.c */ +extern int mp_naps; +extern int boot_cpu_id; +extern struct pcb stoppcbs[]; +extern struct mtx smp_tlb_mtx; + +/* IPI handlers */ +inthand_t + IDTVEC(invltlb), /* TLB shootdowns - global */ + IDTVEC(invlpg), /* TLB shootdowns - 1 page */ + IDTVEC(invlrng), /* TLB shootdowns - page range */ + IDTVEC(hardclock), /* Forward hardclock() */ + IDTVEC(statclock), /* Forward statclock() */ + IDTVEC(cpuast), /* Additional software trap on other cpu */ + IDTVEC(cpustop), /* CPU stops & waits to be restarted */ + IDTVEC(rendezvous), /* handle CPU rendezvous */ + IDTVEC(lazypmap); /* handle lazy pmap release */ + +/* functions in mp_machdep.c */ +void cpu_add(u_int apic_id, char boot_cpu); +void init_secondary(void); +void ipi_selected(u_int cpus, u_int ipi); +void ipi_all(u_int ipi); +void ipi_all_but_self(u_int ipi); +void ipi_self(u_int ipi); +void forward_statclock(void); +void forwarded_statclock(struct clockframe frame); +void forward_hardclock(void); +void forwarded_hardclock(struct clockframe frame); +u_int mp_bootaddress(u_int); +int mp_grab_cpu_hlt(void); +void smp_invlpg(vm_offset_t addr); +void smp_masked_invlpg(u_int mask, vm_offset_t addr); +void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); +void smp_masked_invlpg_range(u_int mask, vm_offset_t startva, + vm_offset_t endva); +void smp_invltlb(void); +void smp_masked_invltlb(u_int mask); + +#endif /* !LOCORE */ +#endif /* SMP */ + +#endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/amd64/include/tss.h b/sys/amd64/include/tss.h index aa60ba0..24d31b3 100644 --- a/sys/amd64/include/tss.h +++ b/sys/amd64/include/tss.h @@ -69,7 +69,7 @@ struct amd64tss { }; #ifdef _KERNEL -extern struct amd64tss common_tss; +extern struct amd64tss common_tss[]; #endif #endif /* _MACHINE_TSS_H_ */ diff --git a/sys/amd64/isa/atpic.c b/sys/amd64/isa/atpic.c index ba44094..64fa45e3 100644 --- a/sys/amd64/isa/atpic.c +++ b/sys/amd64/isa/atpic.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/proc.h> #include <machine/cpufunc.h> #include <machine/frame.h> @@ -52,26 +53,14 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> #include <machine/segments.h> -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> + #include <isa/isavar.h> #define MASTER 0 #define SLAVE 1 -/* XXX: Magic numbers */ -#ifdef PC98 -#ifdef AUTO_EOI_1 -#define MASTER_MODE 0x1f /* Master auto EOI, 8086 mode */ -#else -#define MASTER_MODE 0x1d /* Master 8086 mode */ -#endif -#define SLAVE_MODE 9 /* 8086 mode */ -#else /* IBM-PC */ #ifdef AUTO_EOI_1 #define MASTER_MODE (ICW4_8086 | ICW4_AEOI) #else @@ -82,7 +71,6 @@ __FBSDID("$FreeBSD$"); #else #define SLAVE_MODE ICW4_8086 #endif -#endif /* PC98 */ static void atpic_init(void *dummy); @@ -252,13 +240,7 @@ i8259_init(struct atpic *pic, int slave) /* Reset the PIC and program with next four bytes. */ mtx_lock_spin(&icu_lock); -#ifdef DEV_MCA - /* MCA uses level triggered interrupts. */ - if (MCA_system) - outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4 | ICW1_LTIM); - else -#endif - outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); + outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET; /* Start vector. */ @@ -286,11 +268,9 @@ i8259_init(struct atpic *pic, int slave) /* Reset is finished, default to IRR on read. */ outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR); -#ifndef PC98 /* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */ if (!slave) outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1); -#endif mtx_unlock_spin(&icu_lock); } @@ -317,21 +297,36 @@ atpic_init(void *dummy __unused) continue; ai = &atintrs[i]; setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase + - ai->at_irq, ai->at_intr, SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + ai->at_irq, ai->at_intr, SDT_SYSIGT, SEL_KPL, 0); intr_register_source(&ai->at_intsrc); } } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_SECOND + 1, atpic_init, NULL) void -atpic_handle_intr(struct intrframe iframe) +atpic_handle_intr(void *cookie, struct intrframe iframe) { struct intsrc *isrc; - - KASSERT((uint)iframe.if_vec < ICU_LEN, - ("unknown int %d\n", iframe.if_vec)); - isrc = &atintrs[iframe.if_vec].at_intsrc; + int vec = (uintptr_t)cookie; + + KASSERT(vec < ICU_LEN, ("unknown int %d\n", vec)); + isrc = &atintrs[vec].at_intsrc; + if (vec == 7 || vec == 15) { + int port, isr; + + /* + * Read the ISR register to see if IRQ 7/15 is really + * pending. Reset read register back to IRR when done. + */ + port = ((struct atpic *)isrc->is_pic)->at_ioaddr; + mtx_lock_spin(&icu_lock); + outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS); + isr = inb(port); + outb(port, OCW3_SEL | OCW3_RR); + mtx_unlock_spin(&icu_lock); + if ((isr & IRQ7) == 0) + return; + } intr_execute_handlers(isrc, &iframe); } @@ -398,9 +393,7 @@ static driver_t atpic_driver = { static devclass_t atpic_devclass; DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); -#ifndef PC98 DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); -#endif /* * Return a bitmap of the current interrupt requests. This is 8259-specific diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S index e3dc38f..e41071df 100644 --- a/sys/amd64/isa/atpic_vector.S +++ b/sys/amd64/isa/atpic_vector.S @@ -41,12 +41,8 @@ */ #include <machine/asmacros.h> -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> #include "assym.s" @@ -57,23 +53,28 @@ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ -; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ + movq $irq_num, %rdi; /* pass the IRQ */ \ call atpic_handle_intr ; \ - addl $4, %esp ; /* discard the parameter */ \ -; \ MEXITCOUNT ; \ jmp doreti diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index a11afb4..a2e751e 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -69,8 +69,12 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/frame.h> +#include <machine/intr_machdep.h> #include <machine/md_var.h> #include <machine/psl.h> +#ifdef SMP +#include <machine/smp.h> +#endif #include <machine/specialreg.h> #include <amd64/isa/icu.h> @@ -81,8 +85,6 @@ __FBSDID("$FreeBSD$"); #endif #include <amd64/isa/timerreg.h> -#include <amd64/isa/intr_machdep.h> - /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. @@ -112,6 +114,7 @@ static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; +static struct intsrc *i8254_intsrc; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; @@ -122,7 +125,6 @@ static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; #define ACQUIRE_PENDING 3 static u_char timer2_state; -static void (*timer_func)(struct clockframe *frame) = hardclock; static unsigned i8254_get_timecount(struct timecounter *tc); static void set_timer_freq(u_int freq, int intr_freq); @@ -137,7 +139,7 @@ static struct timecounter i8254_timecounter = { }; static void -clkintr(struct clockframe frame) +clkintr(struct clockframe *frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { @@ -151,7 +153,10 @@ clkintr(struct clockframe frame) clkintr_pending = 0; mtx_unlock_spin(&clock_lock); } - timer_func(&frame); + hardclock(frame); +#ifdef SMP + forward_hardclock(); +#endif } int @@ -207,16 +212,19 @@ release_timer2() * in the statistics, but the stat clock will no longer stop. */ static void -rtcintr(struct clockframe frame) +rtcintr(struct clockframe *frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) { if (profprocs != 0) { if (--pscnt == 0) pscnt = psdiv; - profclock(&frame); + profclock(frame); } if (pscnt == psdiv) - statclock(&frame); + statclock(frame); +#ifdef SMP + forward_statclock(); +#endif } } @@ -719,7 +727,6 @@ void cpu_initclocks() { int diag; - register_t crit; if (statclock_disable) { /* @@ -735,19 +742,9 @@ cpu_initclocks() profhz = RTC_PROFRATE; } - /* Finish initializing 8253 timer 0. */ - /* - * XXX Check the priority of this interrupt handler. I - * couldn't find anything suitable in the BSD/OS code (grog, - * 19 July 2000). - */ - inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, + /* Finish initializing 8254 timer 0. */ + intr_add_handler("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(IRQ0); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); @@ -760,14 +757,9 @@ cpu_initclocks() if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); - inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, + intr_add_handler("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(IRQ8); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); + i8254_intsrc = intr_lookup_source(8); writertc(RTC_STATUSB, rtc_statusb); @@ -833,8 +825,8 @@ i8254_get_timecount(struct timecounter *tc) if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(rflags & PSL_I) && count < timer0_max_count / 2u)) && - (inb(IO_ICU1) & 1))) - )) { + i8254_intsrc != NULL && + i8254_intsrc->is_pic->pic_source_pending(i8254_intsrc))))) { i8254_ticked = 1; i8254_offset += timer0_max_count; } diff --git a/sys/amd64/isa/icu.h b/sys/amd64/isa/icu.h deleted file mode 100644 index 06b8955..0000000 --- a/sys/amd64/isa/icu.h +++ /dev/null @@ -1,152 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)icu.h 5.6 (Berkeley) 5/9/91 - * $FreeBSD$ - */ - -/* - * AT/386 Interrupt Control constants - * W. Jolitz 8/89 - */ - -#ifndef _I386_ISA_ICU_H_ -#define _I386_ISA_ICU_H_ - -#ifndef LOCORE - -/* - * Note: - * Most of the SMP equivilants of the icu macros are coded - * elsewhere in an MP-safe fashion. - * In particular note that the 'imen' variable is opaque. - * DO NOT access imen directly, use INTREN()/INTRDIS(). - */ - -void INTREN(u_int); -void INTRDIS(u_int); - -extern unsigned imen; /* interrupt mask enable */ - -#endif /* LOCORE */ - - -/* - * Interrupt enable bits - in normal order of priority (which we change) - */ -#define IRQ0 0x0001 /* highest priority - timer */ -#define IRQ1 0x0002 -#define IRQ_SLAVE 0x0004 -#define IRQ8 0x0100 -#define IRQ9 0x0200 -#define IRQ2 IRQ9 -#define IRQ10 0x0400 -#define IRQ11 0x0800 -#define IRQ12 0x1000 -#define IRQ13 0x2000 -#define IRQ14 0x4000 -#define IRQ15 0x8000 -#define IRQ3 0x0008 /* this is highest after rotation */ -#define IRQ4 0x0010 -#define IRQ5 0x0020 -#define IRQ6 0x0040 -#define IRQ7 0x0080 /* lowest - parallel printer */ - -/* Initialization control word 1. Written to even address. */ -#define ICW1_IC4 0x01 /* ICW4 present */ -#define ICW1_SNGL 0x02 /* 1 = single, 0 = cascaded */ -#define ICW1_ADI 0x04 /* 1 = 4, 0 = 8 byte vectors */ -#define ICW1_LTIM 0x08 /* 1 = level trigger, 0 = edge */ -#define ICW1_RESET 0x10 /* must be 1 */ -/* 0x20 - 0x80 - in 8080/8085 mode only */ - -/* Initialization control word 2. Written to the odd address. */ -/* No definitions, it is the base vector of the IDT for 8086 mode */ - -/* Initialization control word 3. Written to the odd address. */ -/* For a master PIC, bitfield indicating a slave 8259 on given input */ -/* For slave, lower 3 bits are the slave's ID binary id on master */ - -/* Initialization control word 4. Written to the odd address. */ -#define ICW4_8086 0x01 /* 1 = 8086, 0 = 8080 */ -#define ICW4_AEOI 0x02 /* 1 = Auto EOI */ -#define ICW4_MS 0x04 /* 1 = buffered master, 0 = slave */ -#define ICW4_BUF 0x08 /* 1 = enable buffer mode */ -#define ICW4_SFNM 0x10 /* 1 = special fully nested mode */ - -/* Operation control words. Written after initialization. */ - -/* Operation control word type 1 */ -/* - * No definitions. Written to the odd address. Bitmask for interrupts. - * 1 = disabled. - */ - -/* Operation control word type 2. Bit 3 (0x08) must be zero. Even address. */ -#define OCW2_L0 0x01 /* Level */ -#define OCW2_L1 0x02 -#define OCW2_L2 0x04 -/* 0x08 must be 0 to select OCW2 vs OCW3 */ -/* 0x10 must be 0 to select OCW2 vs ICW1 */ -#define OCW2_EOI 0x20 /* 1 = EOI */ -#define OCW2_SL 0x40 /* EOI mode */ -#define OCW2_R 0x80 /* EOI mode */ - -/* Operation control word type 3. Bit 3 (0x08) must be set. Even address. */ -#define OCW3_RIS 0x01 -#define OCW3_RR 0x02 -#define OCW3_P 0x04 -/* 0x08 must be 1 to select OCW3 vs OCW2 */ -#define OCW3_SEL 0x08 /* must be 1 */ -/* 0x10 must be 0 to select OCW3 vs ICW1 */ -#define OCW3_SMM 0x20 /* special mode mask */ -#define OCW3_ESMM 0x40 /* enable SMM */ - -/* - * Interrupt Control offset into Interrupt descriptor table (IDT) - */ -#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ -#define ICU_LEN 16 /* 32-47 are ISA interrupts */ -#define HWI_MASK 0xffff /* bits for h/w interrupts */ -#define NHWI 16 - -#define ICU_IMR_OFFSET 1 -#define ICU_SLAVEID 2 -#define ICU_EOI (OCW2_EOI) /* non-specific EOI */ -#define ICU_SETPRI (OCW2_R | OCW2_SL) /* set rotation priority */ - -#define INTRCNT_COUNT (1 + ICU_LEN + 2 * ICU_LEN) - -#endif /* !_I386_ISA_ICU_H_ */ diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S deleted file mode 100644 index ad883fe..0000000 --- a/sys/amd64/isa/icu_ipl.S +++ /dev/null @@ -1,81 +0,0 @@ -/*- - * Copyright (c) 1989, 1990 William F. Jolitz. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - - .data - ALIGN_DATA - -/* interrupt mask enable (all h/w off) */ - .globl imen -imen: .long HWI_MASK - - .text - SUPERALIGN_TEXT - -ENTRY(INTREN) - movq %rdi, %rax - movl %eax, %ecx - notl %eax - andl %eax, imen - movl imen, %eax - testb %cl, %cl - je 1f - outb %al, $(IO_ICU1 + ICU_IMR_OFFSET) -1: - testb %ch, %ch - je 2f - shrl $8, %eax - outb %al, $(IO_ICU2 + ICU_IMR_OFFSET) -2: - ret - -ENTRY(INTRDIS) - movq %rdi, %rax - movl %eax, %ecx - orl %eax, imen - movl imen, %eax - testb %cl, %cl - je 1f - outb %al, $(IO_ICU1 + ICU_IMR_OFFSET) -1: - testb %ch, %ch - je 2f - shrl $8, %eax - outb %al, $(IO_ICU2 + ICU_IMR_OFFSET) -2: - ret diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S deleted file mode 100644 index 123a731..0000000 --- a/sys/amd64/isa/icu_vector.S +++ /dev/null @@ -1,144 +0,0 @@ -/* - * from: vector.s, 386BSD 0.1 unknown origin - * $FreeBSD$ - */ - -#define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) -#define IRQ_BYTE(irq_num) ((irq_num) >> 3) - -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ - outb %al,$IO_ICU1 /* ... to clear in service bit */ - -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ - outb %al,$IO_ICU1 /* ... then first icu */ - - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ - jz 1f ; /* Yes, dont swapgs again */ \ - swapgs ; \ -1: movq %rdi,TF_RDI(%rsp) ; \ - movq %rsi,TF_RSI(%rsp) ; \ - movq %rdx,TF_RDX(%rsp) ; \ - movq %rcx,TF_RCX(%rsp) ; \ - movq %r8,TF_R8(%rsp) ; \ - movq %r9,TF_R9(%rsp) ; \ - movq %rax,TF_RAX(%rsp) ; \ - movq %rbx,TF_RBX(%rsp) ; \ - movq %rbp,TF_RBP(%rsp) ; \ - movq %r10,TF_R10(%rsp) ; \ - movq %r11,TF_R11(%rsp) ; \ - movq %r12,TF_R12(%rsp) ; \ - movq %r13,TF_R13(%rsp) ; \ - movq %r14,TF_R14(%rsp) ; \ - movq %r15,TF_R15(%rsp) ; \ - call critical_enter ; \ - movq PCPU(CURTHREAD),%rbx ; \ - incl TD_INTR_NESTING_LEVEL(%rbx) ; \ - movq intr_unit + (irq_num) * 8, %rdi ; \ - call *intr_handler + (irq_num) * 8 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movq intr_countp + (irq_num) * 8,%rax ; \ - incq (%rax) ; \ - decl TD_INTR_NESTING_LEVEL(%rbx) ; \ - call critical_exit ; \ - jmp doreti - -/* - * Slow, threaded interrupts. - * - * XXX Most of the parameters here are obsolete. Fix this when we're - * done. - * XXX we really shouldn't return via doreti if we just schedule the - * interrupt handler and don't run anything. We could just do an - * iret. FIXME. - */ -#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ - jz 1f ; /* Yes, dont swapgs again */ \ - swapgs ; \ -1: movq %rdi,TF_RDI(%rsp) ; \ - movq %rsi,TF_RSI(%rsp) ; \ - movq %rdx,TF_RDX(%rsp) ; \ - movq %rcx,TF_RCX(%rsp) ; \ - movq %r8,TF_R8(%rsp) ; \ - movq %r9,TF_R9(%rsp) ; \ - movq %rax,TF_RAX(%rsp) ; \ - movq %rbx,TF_RBX(%rsp) ; \ - movq %rbp,TF_RBP(%rsp) ; \ - movq %r10,TF_R10(%rsp) ; \ - movq %r11,TF_R11(%rsp) ; \ - movq %r12,TF_R12(%rsp) ; \ - movq %r13,TF_R13(%rsp) ; \ - movq %r14,TF_R14(%rsp) ; \ - movq %r15,TF_R15(%rsp) ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movq PCPU(CURTHREAD),%rbx ; \ - incl TD_INTR_NESTING_LEVEL(%rbx) ; \ - movq $irq_num, %rdi; /* pass the IRQ */ \ - call sched_ithd ; \ - decl TD_INTR_NESTING_LEVEL(%rbx) ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti - -MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) - -#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) -/* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) - -MCOUNT_LABEL(eintr) - diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c deleted file mode 100644 index 30c12e7..0000000 --- a/sys/amd64/isa/intr_machdep.c +++ /dev/null @@ -1,525 +0,0 @@ -/*- - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)isa.c 7.2 (Berkeley) 5/13/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_isa.h" - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/errno.h> -#include <sys/interrupt.h> -#include <sys/kernel.h> -#include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/syslog.h> -#include <sys/systm.h> -#include <sys/unistd.h> - -#include <machine/md_var.h> -#include <machine/segments.h> - -#include <amd64/isa/isa.h> -#include <amd64/isa/icu.h> - -#ifdef DEV_ISA -#include <isa/isavar.h> -#endif -#include <amd64/isa/intr_machdep.h> -#include <sys/interrupt.h> - -/* - * Per-interrupt data. - */ -u_long *intr_countp[ICU_LEN]; /* pointers to interrupt counters */ -driver_intr_t *intr_handler[ICU_LEN]; /* first level interrupt handler */ -struct ithd *ithds[ICU_LEN]; /* real interrupt handler */ -void *intr_unit[ICU_LEN]; - -static struct mtx ithds_table_lock; /* protect the ithds table */ - -static inthand_t *fastintr[ICU_LEN] = { - IDTVEC(fastintr0), IDTVEC(fastintr1), - IDTVEC(fastintr2), IDTVEC(fastintr3), - IDTVEC(fastintr4), IDTVEC(fastintr5), - IDTVEC(fastintr6), IDTVEC(fastintr7), - IDTVEC(fastintr8), IDTVEC(fastintr9), - IDTVEC(fastintr10), IDTVEC(fastintr11), - IDTVEC(fastintr12), IDTVEC(fastintr13), - IDTVEC(fastintr14), IDTVEC(fastintr15), -}; - -static inthand_t *slowintr[ICU_LEN] = { - IDTVEC(intr0), IDTVEC(intr1), IDTVEC(intr2), IDTVEC(intr3), - IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), - IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), - IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15), -}; - -static driver_intr_t isa_strayintr; - -static void ithds_init(void *dummy); -static void ithread_enable(uintptr_t vector); -static void ithread_disable(uintptr_t vector); -static void init_i8259(void); - -#define NMI_PARITY (1 << 7) -#define NMI_IOCHAN (1 << 6) -#define ENMI_WATCHDOG (1 << 7) -#define ENMI_BUSTIMER (1 << 6) -#define ENMI_IOSTATUS (1 << 5) - -#ifdef DEV_ISA -/* - * Bus attachment for the ISA PIC. - */ -static struct isa_pnp_id atpic_ids[] = { - { 0x0000d041 /* PNP0000 */, "AT interrupt controller" }, - { 0 } -}; - -static int -atpic_probe(device_t dev) -{ - int result; - - if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids)) <= 0) - device_quiet(dev); - return(result); -} - -/* - * The generic ISA attachment code will handle allocating any other resources - * that we don't explicitly claim here. - */ -static int -atpic_attach(device_t dev) -{ - return(0); -} - -static device_method_t atpic_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, atpic_probe), - DEVMETHOD(device_attach, atpic_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - { 0, 0 } -}; - -static driver_t atpic_driver = { - "atpic", - atpic_methods, - 1, /* no softc */ -}; - -static devclass_t atpic_devclass; - -DRIVER_MODULE(atpic, isa, atpic_driver, atpic_devclass, 0, 0); -DRIVER_MODULE(atpic, acpi, atpic_driver, atpic_devclass, 0, 0); -#endif /* DEV_ISA */ - -/* - * Handle a NMI, possibly a machine check. - * return true to panic system, false to ignore. - */ -int -isa_nmi(cd) - int cd; -{ - int retval = 0; - int isa_port = inb(0x61); - - log(LOG_CRIT, "NMI ISA STATUS 0x%02x", isa_port); - - if (isa_port & NMI_PARITY) { - log(LOG_CRIT, "RAM parity error, likely hardware failure."); - retval = 1; - } - - if (isa_port & NMI_IOCHAN) { - log(LOG_CRIT, "I/O channel check, likely hardware failure."); - retval = 1; - } - - return (retval); -} - -/* - * ICU reinitialize when ICU configuration has lost. - */ -void icu_reinit() -{ - int i; - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - init_i8259(); - for(i=0;i<ICU_LEN;i++) - if(intr_handler[i] != isa_strayintr) - INTREN(1<<i); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -/* - * Create a default interrupt table to avoid problems caused by - * spurious interrupts during configuration of kernel, then setup - * interrupt control unit. - */ -void -isa_defaultirq() -{ - int i; - register_t crit; - - /* icu vectors */ - for (i = 0; i < ICU_LEN; i++) - icu_unset(i, (driver_intr_t *)NULL); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - init_i8259(); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - - -/* - *initialize 8259's - */ -static void init_i8259() -{ - - outb(IO_ICU1, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ - - outb(IO_ICU1+ICU_IMR_OFFSET, NRSVIDT); /* starting at this vector index */ - outb(IO_ICU1+ICU_IMR_OFFSET, IRQ_SLAVE);/* slave on line 2 */ - outb(IO_ICU1+ICU_IMR_OFFSET, ICW4_8086);/* 8086 mode */ - outb(IO_ICU1+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU1, OCW3_SEL | OCW3_RR); /* default to IRR on read */ - outb(IO_ICU1, ICU_SETPRI | 0x2);/* pri order 3-7, 0-2 (com2 first) */ - - outb(IO_ICU2, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ - - outb(IO_ICU2+ICU_IMR_OFFSET, NRSVIDT+8); /* staring at this vector index */ - outb(IO_ICU2+ICU_IMR_OFFSET, ICU_SLAVEID); /* my slave id is 2 */ - outb(IO_ICU2+ICU_IMR_OFFSET, ICW4_8086); /* 8086 mode */ - outb(IO_ICU2+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU2, OCW3_SEL | OCW3_RR); /* default to IRR on read */ -} - -/* - * Caught a stray interrupt, notify - */ -static int isaglitch7; -static int isaglitch15; - -static void -isa_strayintr(vcookiep) - void *vcookiep; -{ - int intr = (void **)vcookiep - &intr_unit[0]; - int isr; - - /* Determine if it is a stray interrupt or simply a glitch */ - if (intr == 7) { - outb(IO_ICU1, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU1); - outb(IO_ICU1, OCW3_SEL | OCW3_RR | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - isaglitch7++; - return; - } - } - if (intr == 15) { - outb(IO_ICU2, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU2); - outb(IO_ICU2, OCW3_SEL | OCW3_RR); /* reselect IIR */ - if ((isr & 0x80) == 0) { - isaglitch15++; - return; - } - } - if (intrcnt[1 + intr] <= 5) - log(LOG_ERR, "stray irq %d\n", intr); - if (intrcnt[1 + intr] == 5) - log(LOG_CRIT, - "too many stray irq %d's; not logging any more\n", intr); -} - -#ifdef DEV_ISA -/* - * Return a bitmap of the current interrupt requests. This is 8259-specific - * and is only suitable for use at probe time. - */ -intrmask_t -isa_irq_pending() -{ - u_char irr1; - u_char irr2; - - irr1 = inb(IO_ICU1); - irr2 = inb(IO_ICU2); - return ((irr2 << 8) | irr1); -} -#endif - -/* - * Update intrnames array with the specified name. This is used by - * vmstat(8) and the like. - */ -static void -update_intrname(int intr, const char *name) -{ - char buf[32]; - char *cp; - int name_index, off, strayintr; - - /* - * Initialise strings for bitbucket and stray interrupt counters. - * These have statically allocated indices 0 and 1 through ICU_LEN. - */ - if (intrnames[0] == '\0') { - off = sprintf(intrnames, "???") + 1; - for (strayintr = 0; strayintr < ICU_LEN; strayintr++) - off += sprintf(intrnames + off, "stray irq%d", - strayintr) + 1; - } - - if (name == NULL) - name = "???"; - if (snprintf(buf, sizeof(buf), "%s irq%d", name, intr) >= sizeof(buf)) - goto use_bitbucket; - - /* - * Search for `buf' in `intrnames'. In the usual case when it is - * not found, append it to the end if there is enough space (the \0 - * terminator for the previous string, if any, becomes a separator). - */ - for (cp = intrnames, name_index = 0; - cp != eintrnames && name_index < NR_INTRNAMES; - cp += strlen(cp) + 1, name_index++) { - if (*cp == '\0') { - if (strlen(buf) >= eintrnames - cp) - break; - strcpy(cp, buf); - goto found; - } - if (strcmp(cp, buf) == 0) - goto found; - } - -use_bitbucket: - printf("update_intrname: counting %s irq%d as %s\n", name, intr, - intrnames); - name_index = 0; -found: - intr_countp[intr] = &intrcnt[name_index]; -} - -int -icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) -{ - register_t crit; - - if ((u_int)intr >= ICU_LEN || intr == ICU_SLAVEID) - return (EINVAL); -#if 0 - if (intr_handler[intr] != isa_strayintr) - return (EBUSY); -#endif - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - intr_handler[intr] = handler; - intr_unit[intr] = arg; - setidt(ICU_OFFSET + intr, - flags & INTR_FAST ? fastintr[intr] : slowintr[intr], - SDT_SYSIGT, SEL_KPL, 0); - INTREN(1 << intr); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - return (0); -} - -/* - * Dissociate an interrupt handler from an IRQ and set the handler to - * the stray interrupt handler. The 'handler' parameter is used only - * for consistency checking. - */ -int -icu_unset(intr, handler) - int intr; - driver_intr_t *handler; -{ - register_t crit; - - if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) - return (EINVAL); - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << intr); - intr_countp[intr] = &intrcnt[1 + intr]; - intr_handler[intr] = isa_strayintr; - intr_unit[intr] = &intr_unit[intr]; - setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYSIGT, SEL_KPL, 0); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - return (0); -} - -static void -ithds_init(void *dummy) -{ - - mtx_init(&ithds_table_lock, "ithread table lock", NULL, MTX_SPIN); -} -SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); - -static void -ithread_enable(uintptr_t vector) -{ - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(1 << vector); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -static void -ithread_disable(uintptr_t vector) -{ - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << vector); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); -} - -int -inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, - enum intr_type flags, void **cookiep) -{ - struct ithd *ithd; /* descriptor for the IRQ */ - int errcode = 0; - int created_ithd = 0; - - /* - * Work around a race where more than one CPU may be registering - * handlers on the same IRQ at the same time. - */ - mtx_lock_spin(&ithds_table_lock); - ithd = ithds[irq]; - mtx_unlock_spin(&ithds_table_lock); - if (ithd == NULL) { - errcode = ithread_create(&ithd, irq, 0, ithread_disable, - ithread_enable, "irq%d:", irq); - if (errcode) - return (errcode); - mtx_lock_spin(&ithds_table_lock); - if (ithds[irq] == NULL) { - ithds[irq] = ithd; - created_ithd++; - mtx_unlock_spin(&ithds_table_lock); - } else { - struct ithd *orphan; - - orphan = ithd; - ithd = ithds[irq]; - mtx_unlock_spin(&ithds_table_lock); - ithread_destroy(orphan); - } - } - - errcode = ithread_add_handler(ithd, name, handler, arg, - ithread_priority(flags), flags, cookiep); - - if ((flags & INTR_FAST) == 0 || errcode) - /* - * The interrupt process must be in place, but - * not necessarily schedulable, before we - * initialize the ICU, since it may cause an - * immediate interrupt. - */ - if (icu_setup(irq, sched_ithd, arg, flags) != 0) - panic("inthand_add: Can't initialize ICU"); - - if (errcode) - return (errcode); - - if (flags & INTR_FAST) { - errcode = icu_setup(irq, handler, arg, flags); - if (errcode && bootverbose) - printf("\tinthand_add(irq%d) failed, result=%d\n", - irq, errcode); - if (errcode) - return (errcode); - } - - update_intrname(irq, name); - return (0); -} - -/* - * Deactivate and remove linked list the interrupt handler descriptor - * data connected created by an earlier call of inthand_add(), then - * adjust the interrupt masks if necessary. - * - * Return the memory held by the interrupt handler descriptor data - * structure to the system. First ensure the handler is not actively - * in use. - */ -int -inthand_remove(void *cookie) -{ - - return (ithread_remove_handler(cookie)); -} diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h deleted file mode 100644 index 26f500e..0000000 --- a/sys/amd64/isa/intr_machdep.h +++ /dev/null @@ -1,118 +0,0 @@ -/*- - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _I386_ISA_INTR_MACHDEP_H_ -#define _I386_ISA_INTR_MACHDEP_H_ - -/* - * Low level interrupt code. - */ - -#ifdef _KERNEL - -#ifdef LOCORE - -/* - * Protects the IO APIC, 8259 PIC, imen, and apic_imen - */ -#define ICU_LOCK MTX_LOCK_SPIN(icu_lock, 0) -#define ICU_UNLOCK MTX_UNLOCK_SPIN(icu_lock) - -#else /* LOCORE */ - -/* - * Type of the first (asm) part of an interrupt handler. - */ -typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); -typedef void unpendhand_t(void); - -#define IDTVEC(name) __CONCAT(X,name) - -extern u_long *intr_countp[]; /* pointers into intrcnt[] */ -extern driver_intr_t *intr_handler[]; /* C entry points of intr handlers */ -extern struct ithd *ithds[]; -extern void *intr_unit[]; /* cookies to pass to intr handlers */ -extern struct mtx icu_lock; - -inthand_t - IDTVEC(fastintr0), IDTVEC(fastintr1), - IDTVEC(fastintr2), IDTVEC(fastintr3), - IDTVEC(fastintr4), IDTVEC(fastintr5), - IDTVEC(fastintr6), IDTVEC(fastintr7), - IDTVEC(fastintr8), IDTVEC(fastintr9), - IDTVEC(fastintr10), IDTVEC(fastintr11), - IDTVEC(fastintr12), IDTVEC(fastintr13), - IDTVEC(fastintr14), IDTVEC(fastintr15); -inthand_t - IDTVEC(intr0), IDTVEC(intr1), IDTVEC(intr2), IDTVEC(intr3), - IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), - IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), - IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); -unpendhand_t - IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), - IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), - IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), - IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), - IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), - IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), - IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), - IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), - IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), - IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), - IDTVEC(fastunpend30), IDTVEC(fastunpend31); - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -void isa_defaultirq(void); -int isa_nmi(int cd); -int icu_setup(int intr, driver_intr_t *func, void *arg, int flags); -int icu_unset(int intr, driver_intr_t *handler); -void icu_reinit(void); - -/* - * WARNING: These are internal functions and not to be used by device drivers! - * They are subject to change without notice. - */ -int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, - enum intr_type flags, void **cookiep); -int inthand_remove(void *cookie); -void sched_ithd(void *dummy); -void call_fast_unpend(int irq); - -#endif /* LOCORE */ - -#endif /* _KERNEL */ - -#endif /* !_I386_ISA_INTR_MACHDEP_H_ */ diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c deleted file mode 100644 index e36e866..0000000 --- a/sys/amd64/isa/ithread.c +++ /dev/null @@ -1,115 +0,0 @@ -/*- - * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Berkeley Software Design Inc's name may not be used to endorse or - * promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * BSDI Id: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* Interrupt thread code. */ - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/interrupt.h> -#include <sys/systm.h> -#include <sys/vmmeter.h> - -#include <amd64/isa/icu.h> -#include <amd64/isa/intr_machdep.h> -#include <amd64/isa/isa.h> - -struct int_entropy { - struct proc *p; - int irq; -}; - -static u_int straycount[ICU_LEN]; -static u_int glitchcount7; -static u_int glitchcount15; - -#define MAX_STRAY_LOG 5 - -/* - * Schedule a heavyweight interrupt process. This function is called - * from the interrupt handlers Xintr<num>. - */ -void -sched_ithd(void *cookie) -{ - int irq = (uintptr_t) cookie; /* IRQ we're handling */ - struct ithd *ithd = ithds[irq]; /* and the process that does it */ - int error, isr; - - /* This used to be in icu_vector.s */ - /* - * We count software interrupts when we process them. The - * code here follows previous practice, but there's an - * argument for counting hardware interrupts when they're - * processed too. - */ - atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */ - atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */ - - /* - * Schedule the interrupt thread to run if needed and switch to it - * if we schedule it if !cold. - */ - error = ithread_schedule(ithd, !cold); - - /* - * Log stray interrupts. - */ - if (error == EINVAL) { - /* Determine if it is a stray interrupt or simply a glitch */ - if (irq == 7) { - outb(IO_ICU1, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU1); - outb(IO_ICU1, OCW3_SEL | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - glitchcount7++; - return; - } - } - if (irq == 15) { - outb(IO_ICU2, OCW3_SEL); /* select IS register */ - isr = inb(IO_ICU2); - outb(IO_ICU2, OCW3_SEL | OCW3_RIS); /* reselect IIR */ - if ((isr & 0x80) == 0) { - glitchcount15++; - return; - } - } - if (straycount[irq] < MAX_STRAY_LOG) { - printf("stray irq %d\n", irq); - if (++straycount[irq] == MAX_STRAY_LOG) - printf( - "got %d stray irq %d's: not logging anymore\n", - MAX_STRAY_LOG, irq); - } - } -} diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S deleted file mode 100644 index 2d7be50..0000000 --- a/sys/amd64/isa/vector.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * from: vector.s, 386BSD 0.1 unknown origin - * $FreeBSD$ - */ - -#include <amd64/isa/icu.h> -#include <amd64/isa/isa.h> -#include <amd64/isa/intr_machdep.h> - - .data - ALIGN_DATA - -/* - * Interrupt counters and names for export to vmstat(8) and friends. - * - * XXX this doesn't really belong here; everything except the labels - * for the endpointers is almost machine-independent. - */ - - .globl intrcnt, eintrcnt -intrcnt: - .space INTRCNT_COUNT * 8 -eintrcnt: - - .globl intrnames, eintrnames -intrnames: - .space INTRCNT_COUNT * 32 -eintrnames: - .text - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - * - * XXX - the interrupt frame is set up to look like a trap frame. This is - * usually a waste of time. The only interrupt handlers that want a frame - * are the clock handler (it wants a clock frame), the fpu handler (it's - * easier to do right all in assembler). The interrupt return routine - * needs a trap frame for rare AST's (it could easily convert the frame). - * The direct costs of setting up a trap frame are two pushl's (error - * code and trap number), an addl to get rid of these, and pushing and - * popping the call-saved regs %esi, %edi and %ebp twice, The indirect - * costs are making the driver interface nonuniform so unpending of - * interrupts is more complicated and slower (call_driver(unit) would - * be easier than ensuring an interrupt frame for all handlers. Finally, - * there are some struct copies in the fpu handler and maybe in the clock - * handler that could be avoided by working more with pointers to frames - * instead of frames. - * - * XXX - should we do a cld on every system entry to avoid the requirement - * for scattered cld's? - * - * Coding notes for *.s: - * - * If possible, avoid operations that involve an operand size override. - * Word-sized operations might be smaller, but the operand size override - * makes them slower on on 486's and no faster on 386's unless perhaps - * the instruction pipeline is depleted. E.g., - * - * Use movl to seg regs instead of the equivalent but more descriptive - * movw - gas generates an irelevant (slower) operand size override. - * - * Use movl to ordinary regs in preference to movw and especially - * in preference to movz[bw]l. Use unsigned (long) variables with the - * top bits clear instead of unsigned short variables to provide more - * opportunities for movl. - * - * If possible, use byte-sized operations. They are smaller and no slower. - * - * Use (%reg) instead of 0(%reg) - gas generates larger code for the latter. - * - * If the interrupt frame is made more flexible, INTR can push %eax first - * and decide the ipending case with less overhead, e.g., by avoiding - * loading segregs. - */ - -#include "amd64/isa/icu_vector.S" diff --git a/sys/amd64/pci/pci_bus.c b/sys/amd64/pci/pci_bus.c index 3f0b9dc..6be70b7 100644 --- a/sys/amd64/pci/pci_bus.c +++ b/sys/amd64/pci/pci_bus.c @@ -426,7 +426,7 @@ legacy_pcib_probe(device_t dev) if (pci_cfgregopen() == 0) return ENXIO; - return 0; + return -100; } int diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 0af5b67..82ace4c 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -8,48 +8,26 @@ # dependency lines other than the first are silently ignored. # -ia32_genassym.o optional ia32 \ +ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "ia32_genassym.o" # -ia32_assym.h optional ia32 \ +ia32_assym.h standard \ dependency "$S/kern/genassym.sh ia32_genassym.o" \ compile-with "env NM=${NM} sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "ia32_assym.h" # - -dev/kbd/atkbd.c optional atkbd -dev/kbd/atkbdc.c optional atkbdc -dev/kbd/kbd.c optional atkbd -dev/kbd/kbd.c optional kbd -dev/kbd/kbd.c optional sc -dev/kbd/kbd.c optional ukbd -dev/kbd/kbd.c optional vt -dev/sio/sio.c optional sio -dev/sio/sio_isa.c optional sio isa -dev/syscons/apm/apm_saver.c optional apm_saver apm -dev/syscons/schistory.c optional sc -dev/syscons/scmouse.c optional sc -dev/syscons/scterm.c optional sc -dev/syscons/scterm-dumb.c optional sc -dev/syscons/scterm-sc.c optional sc -dev/syscons/scvesactl.c optional sc vga vesa -dev/syscons/scvgarndr.c optional sc vga -dev/syscons/scvidctl.c optional sc -dev/syscons/scvtb.c optional sc -dev/syscons/syscons.c optional sc -dev/syscons/sysmouse.c optional sc -dev/uart/uart_cpu_amd64.c optional uart -isa/atkbd_isa.c optional atkbd - amd64/acpica/OsdEnvironment.c optional acpi amd64/acpica/acpi_machdep.c optional acpi amd64/acpica/acpi_wakeup.c optional acpi +amd64/acpica/madt.c optional acpi amd64/amd64/amd64-gdbstub.c optional ddb amd64/amd64/amd64_mem.c standard +amd64/amd64/apic_vector.S standard +amd64/amd64/atomic.c standard amd64/amd64/autoconf.c standard amd64/amd64/busdma_machdep.c standard amd64/amd64/cpu_switch.S standard @@ -60,12 +38,21 @@ amd64/amd64/db_trace.c optional ddb amd64/amd64/dump_machdep.c standard amd64/amd64/elf_machdep.c standard amd64/amd64/exception.S standard +amd64/amd64/fpu.c standard amd64/amd64/identcpu.c standard +amd64/amd64/in_cksum.c optional inet amd64/amd64/initcpu.c standard +amd64/amd64/intr_machdep.c standard +amd64/amd64/io_apic.c standard amd64/amd64/legacy.c standard +amd64/amd64/local_apic.c standard amd64/amd64/locore.S standard no-obj amd64/amd64/machdep.c standard amd64/amd64/mem.c standard +amd64/amd64/mp_machdep.c optional smp +amd64/amd64/mpboot.S optional smp +amd64/amd64/mptable.c optional mptable +amd64/amd64/mptable_pci.c optional mptable pci amd64/amd64/nexus.c standard amd64/amd64/pmap.c standard amd64/amd64/sigtramp.S standard @@ -75,72 +62,64 @@ amd64/amd64/trap.c standard amd64/amd64/tsc.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard -amd64/amd64/in_cksum.c optional inet -amd64/amd64/fpu.c standard - +amd64/isa/atpic.c optional atpic isa +amd64/isa/atpic_vector.S optional atpic isa amd64/isa/clock.c standard -amd64/isa/intr_machdep.c standard amd64/isa/isa.c standard amd64/isa/isa_dma.c standard -amd64/isa/ithread.c standard - -amd64/pci/pci_cfgreg.c optional pci +amd64/isa/nmi.c standard amd64/pci/pci_bus.c optional pci - -compat/freebsd32/freebsd32_misc.c optional ia32 -compat/freebsd32/freebsd32_syscalls.c optional ia32 -compat/freebsd32/freebsd32_sysent.c optional ia32 -compat/ia32/ia32_sigtramp.S optional ia32 -compat/ia32/ia32_sysvec.c optional ia32 -amd64/ia32/ia32_signal.c optional ia32 -amd64/ia32/ia32_exception.S optional ia32 -amd64/ia32/ia32_syscall.c optional ia32 -kern/imgact_elf32.c optional ia32 - -# This file tells config what files go into building a kernel, -# files marked standard are always included. -# +amd64/pci/pci_cfgreg.c optional pci dev/fb/fb.c optional fb dev/fb/fb.c optional vga dev/fb/splash.c optional splash dev/fb/vga.c optional vga - dev/kbd/atkbd.c optional atkbd dev/kbd/atkbdc.c optional atkbdc dev/kbd/kbd.c optional atkbd dev/kbd/kbd.c optional kbd dev/kbd/kbd.c optional sc dev/kbd/kbd.c optional ukbd -dev/kbd/kbd.c optional vt - dev/ppc/ppc.c optional ppc - +dev/sio/sio.c optional sio +dev/sio/sio_isa.c optional sio isa +dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/schistory.c optional sc dev/syscons/scmouse.c optional sc -dev/syscons/scterm.c optional sc dev/syscons/scterm-dumb.c optional sc dev/syscons/scterm-sc.c optional sc +dev/syscons/scterm.c optional sc dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvidctl.c optional sc dev/syscons/scvtb.c optional sc dev/syscons/syscons.c optional sc dev/syscons/sysmouse.c optional sc +dev/uart/uart_cpu_amd64.c optional uart geom/geom_bsd.c standard geom/geom_bsd_enc.c standard geom/geom_mbr.c standard geom/geom_mbr_enc.c standard - -#i386/isa/pmtimer.c optional pmtimer -# isa/atkbd_isa.c optional atkbd isa/atkbdc_isa.c optional atkbdc isa/fd.c optional fdc isa/psm.c optional psm isa/syscons_isa.c optional sc isa/vga_isa.c optional vga -pci/agp_intel.c optional agp -pci/agp_via.c optional agp -pci/agp_sis.c optional agp pci/agp_ali.c optional agp pci/agp_amd.c optional agp pci/agp_i810.c optional agp +pci/agp_intel.c optional agp +pci/agp_sis.c optional agp +pci/agp_via.c optional agp +# +# IA32 binary support +# +amd64/ia32/ia32_exception.S optional ia32 +amd64/ia32/ia32_signal.c optional ia32 +amd64/ia32/ia32_syscall.c optional ia32 +compat/freebsd32/freebsd32_misc.c optional ia32 +compat/freebsd32/freebsd32_syscalls.c optional ia32 +compat/freebsd32/freebsd32_sysent.c optional ia32 +compat/ia32/ia32_sigtramp.S optional ia32 +compat/ia32/ia32_sysvec.c optional ia32 +kern/imgact_elf32.c optional ia32 diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64 index af1adb7..ccbe0f6 100644 --- a/sys/conf/options.amd64 +++ b/sys/conf/options.amd64 @@ -1,6 +1,8 @@ # $FreeBSD$ # Options specific to AMD64 platform kernels +AUTO_EOI_1 opt_auto_eoi.h +AUTO_EOI_2 opt_auto_eoi.h MAXMEM PERFMON opt_perfmon.h @@ -29,3 +31,5 @@ PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h IA32 +NO_MIXED_MODE +DEV_ATPIC opt_atpic.h diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 0190b2b..28c3224 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -816,7 +816,7 @@ pci_add_resources(device_t pcib, device_t bus, device_t dev) } if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) { -#if defined(__ia64__) || defined(__i386__) +#if defined(__ia64__) || defined(__i386__) || defined(__amd64__) /* * Try to re-route interrupts. Sometimes the BIOS or * firmware may leave bogus values in these registers. diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 3a71f6e..2dee8e3 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -97,7 +97,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/queue.h> #include <sys/sched.h> -#if defined(SMP) && defined(__i386__) +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) #include <sys/smp.h> #endif #include <machine/critical.h> @@ -124,7 +124,7 @@ choosethread(void) struct thread *td; struct ksegrp *kg; -#if defined(SMP) && defined(__i386__) +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) if (smp_active == 0 && PCPU_GET(cpuid) != 0) { /* Shutting down, run idlethread on AP's */ td = PCPU_GET(idlethread); diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 6cb5dba..28d5605 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -296,7 +296,7 @@ static struct witness_order_list_entry order_lists[] = { { "icu", &lock_class_mtx_spin }, #ifdef SMP { "smp rendezvous", &lock_class_mtx_spin }, -#ifdef __i386__ +#if defined(__i386__) || defined(__amd64__) { "tlb", &lock_class_mtx_spin }, { "lazypmap", &lock_class_mtx_spin }, #endif |