diff options
34 files changed, 2134 insertions, 920 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 95c9133..e3a37e1 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -181,30 +181,108 @@ Xspuriousint: iret /* - * Handle TLB shootdowns. + * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax + movl $KPSEL, %eax /* Private space selector */ mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - ss - incl _xhits(,%eax,4) + incl xhits_gbl(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 - ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Single page TLB shootdown + */ + .text + SUPERALIGN_TEXT + .globl Xinvlpg +Xinvlpg: + pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + ss + incl xhits_pg(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %eax + invlpg (%eax) /* invalidate single page */ + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Page range TLB shootdown. + */ + .text + SUPERALIGN_TEXT + .globl Xinvlrng +Xinvlrng: + pushl %eax + pushl %edx + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + incl xhits_rng(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %edx + movl smp_tlb_addr2, %eax +1: invlpg (%edx) /* invalidate single page */ + addl $PAGE_SIZE, %edx + cmpl %edx, %eax + jb 1b + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %edx popl %eax iret @@ -443,12 +521,6 @@ Xrendezvous: .data -#ifdef COUNT_XINVLTLB_HITS - .globl _xhits -_xhits: - .space (NCPU * 4), 0 -#endif /* COUNT_XINVLTLB_HITS */ - .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index 4fff220..299bc3e 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -381,12 +381,6 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_PGE, %eax - movl %eax, %cr4 -1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -809,14 +803,7 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 2f - orl $PG_G,%edx -#endif - -2: movl $R(etext),%ecx + movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -827,13 +814,7 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - orl $PG_G,%edx -#endif - -1: movl R(KERNend),%ecx + movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index 4fff220..299bc3e 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -381,12 +381,6 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_PGE, %eax - movl %eax, %cr4 -1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -809,14 +803,7 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 2f - orl $PG_G,%edx -#endif - -2: movl $R(etext),%ecx + movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -827,13 +814,7 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - orl $PG_G,%edx -#endif - -1: movl R(KERNend),%ecx + movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 27ee7ae..008dfc5 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 27ee7ae..008dfc5 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f12cb0b..ba3ee22 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -85,6 +85,9 @@ #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> +#if defined(SMP) +#include <sys/smp.h> +#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -101,7 +104,6 @@ #include <machine/md_var.h> #include <machine/specialreg.h> #if defined(SMP) || defined(APIC_IO) -#include <machine/smp.h> #include <machine/apic.h> #include <machine/segments.h> #include <machine/tss.h> @@ -259,10 +261,10 @@ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; + #ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) { + if (cpu_feature & CPUID_PSE) newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); - } #endif return newaddr; } @@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr) PTD[i] = 0; pgeflag = 0; -#if !defined(SMP) /* XXX - see also mp_machdep.c */ - if (cpu_feature & CPUID_PGE) { +#if !defined(SMP) || defined(ENABLE_PG_G) + if (cpu_feature & CPUID_PGE) pgeflag = PG_G; - } #endif /* @@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ pdir4mb = 0; -#if !defined(DISABLE_PSE) +#ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) { pd_entry_t ptditmp; /* @@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - -#if !defined(SMP) - /* - * Enable the PSE mode. - */ - load_cr4(rcr4() | CR4_PSE); - - /* - * We can do the mapping here for the single processor - * case. We simply ignore the old page table page from - * now on. - */ - /* - * For SMP, we still need 4K pages to bootstrap APs, - * PSE will be enabled as soon as all APs are up. - */ - PTD[KPTDI] = (pd_entry_t) ptditmp; - kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; - invltlb(); -#endif } #endif - +#ifndef SMP + /* + * Turn on PGE/PSE. SMP does this later on since the + * 4K page tables are required for AP boot (for now). + * XXX fixme. + */ + pmap_set_opt(); +#endif #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); - /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif - - invltlb(); + cpu_invltlb(); } -#ifdef SMP /* - * Set 4mb pdir for mp startup + * Enable 4MB page mode for MP startup. Turn on PG_G support. + * BSP will run this after all the AP's have started up. */ void pmap_set_opt(void) { - if (pseflag && (cpu_feature & CPUID_PSE)) { + pt_entry_t *pte; + vm_offset_t va; + + if (pgeflag && (cpu_feature & CPUID_PGE)) + load_cr4(rcr4() | CR4_PGE); +#ifndef DISABLE_PSE + if (pseflag && (cpu_feature & CPUID_PSE)) load_cr4(rcr4() | CR4_PSE); - if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ +#endif + if (PCPU_GET(cpuid) == 0) { +#ifndef DISABLE_PSE + if (pdir4mb) kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; - cpu_invltlb(); +#endif + if (pgeflag) { + /* XXX see earlier comments about virtual_avail */ + for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE) + { + pte = vtopte(va); + if (*pte) + *pte |= pgeflag; + } } - } + /* + * for SMP, this will cause all cpus to reload again, which + * is actually what we want since they now have CR4_PGE on. + */ + invltlb(); + } else + cpu_invltlb(); } -#endif /* * Initialize the pmap module. @@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va) return 0; } -static PMAP_INLINE void -invltlb_1pg(vm_offset_t va) -{ -#ifdef I386_CPU - invltlb(); -#else - invlpg(va); -#endif -} - static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { #if defined(SMP) - if (pmap->pm_active & PCPU_GET(cpumask)) - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); + u_int cpumask; + u_int other_cpus; + struct thread *td; + + td = curthread; + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + */ + if (td->td_critnest == 1) + cpu_critical_exit(td->td_savecrit); + if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { + invlpg(va); /* global */ + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + cpu_invlpg(va); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg(pmap->pm_active & other_cpus, va); + } + critical_exit(); #else if (pmap->pm_active) - invltlb_1pg(va); + cpu_invlpg(va); #endif } @@ -580,10 +598,30 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { #if defined(SMP) - if (pmap->pm_active & PCPU_GET(cpumask)) - cpu_invltlb(); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); + u_int cpumask; + u_int other_cpus; + struct thread *td; + + td = curthread; + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + */ + if (td->td_critnest == 1) + cpu_critical_exit(td->td_savecrit); + if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { + invltlb(); /* global */ + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + cpu_invltlb(); + if (pmap->pm_active & other_cpus) + smp_masked_invltlb(pmap->pm_active & other_cpus); + } + critical_exit(); #else if (pmap->pm_active) invltlb(); @@ -609,12 +647,7 @@ get_ptbase(pmap) /* otherwise, we are alternate address space */ if (frame != (APTDpde & PG_FRAME)) { APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); -#if defined(SMP) - /* The page directory is not shared between CPUs */ - cpu_invltlb(); -#else invltlb(); -#endif } return APTmap; } @@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va) newpf = pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; - invltlb_1pg((vm_offset_t) PADDR1); + pmap_invalidate_page(pmap, (vm_offset_t) PADDR1); } return PADDR1 + (index & (NPTEPG - 1)); } @@ -689,20 +722,17 @@ pmap_extract(pmap, va) /* * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a invltlb after doing the pmap_kenter... */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_offset_t pa) { pt_entry_t *pte; - pt_entry_t npte, opte; + pt_entry_t npte; npte = pa | PG_RW | PG_V | pgeflag; pte = vtopte(va); - opte = *pte; *pte = npte; - invltlb_1pg(va); + invlpg(va); } /* @@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va) pte = vtopte(va); *pte = 0; - invltlb_1pg(va); + invlpg(va); } /* @@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va) vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { - vm_offset_t sva = *virt; - vm_offset_t va = sva; + vm_offset_t va, sva; + pt_entry_t *pte; + + va = sva = *virt; while (start < end) { - pmap_kenter(va, start); + pte = vtopte(va); + *pte = start | PG_RW | PG_V | pgeflag; va += PAGE_SIZE; start += PAGE_SIZE; } + invlpg_range(sva, end); *virt = va; return (sva); } @@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) * over. The page *must* be wired. */ void -pmap_qenter(vm_offset_t va, vm_page_t *m, int count) +pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) { - vm_offset_t end_va; + vm_offset_t va, end_va; + pt_entry_t *pte; + va = sva; end_va = va + count * PAGE_SIZE; - - while (va < end_va) { - pt_entry_t *pte; + while (va < end_va) { pte = vtopte(va); *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; -#ifdef SMP - cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; m++; } -#ifdef SMP - smp_invltlb(); -#endif + invlpg_range(sva, end_va); } /* @@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count) * kernel -- it is meant only for temporary mappings. */ void -pmap_qremove(vm_offset_t va, int count) +pmap_qremove(vm_offset_t sva, int count) { - vm_offset_t end_va; + pt_entry_t *pte; + vm_offset_t va, end_va; - end_va = va + count*PAGE_SIZE; + va = sva; + end_va = va + count * PAGE_SIZE; while (va < end_va) { - pt_entry_t *pte; - pte = vtopte(va); *pte = 0; -#ifdef SMP - cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; } -#ifdef SMP - smp_invltlb(); -#endif + invlpg_range(sva, end_va); } static vm_page_t @@ -824,9 +844,6 @@ retry: void pmap_new_proc(struct proc *p) { -#ifdef I386_CPU - int updateneeded = 0; -#endif int i; vm_object_t upobj; vm_offset_t up; @@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p) * Enter the page into the kernel address space. */ *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(up + i * PAGE_SIZE); -#endif - } vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } -#ifdef I386_CPU - if (updateneeded) - invltlb(); -#endif } /* @@ -901,7 +909,7 @@ pmap_dispose_proc(p) vm_object_t upobj; vm_offset_t up; vm_page_t m; - pt_entry_t *ptek, oldpte; + pt_entry_t *ptek; upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; @@ -911,17 +919,11 @@ pmap_dispose_proc(p) if (m == NULL) panic("pmap_dispose_proc: upage already missing?"); vm_page_busy(m); - oldpte = *(ptek + i); *(ptek + i) = 0; -#ifndef I386_CPU invlpg(up + i * PAGE_SIZE); -#endif vm_page_unwire(m, 0); vm_page_free(m); } -#ifdef I386_CPU - invltlb(); -#endif } /* @@ -986,9 +988,6 @@ pmap_swapin_proc(p) void pmap_new_thread(struct thread *td) { -#ifdef I386_CPU - int updateneeded = 0; -#endif int i; vm_object_t ksobj; vm_page_t m; @@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td) ptek = vtopte(ks - PAGE_SIZE); oldpte = *ptek; *ptek = 0; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(ks - PAGE_SIZE); -#endif - } ptek++; #else /* get a kernel virtual address for the kstack for this thread */ @@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td) * Enter the page into the kernel address space. */ *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(ks + i * PAGE_SIZE); -#endif - } vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } -#ifdef I386_CPU - if (updateneeded) - invltlb(); -#endif } /* @@ -1086,7 +1071,7 @@ pmap_dispose_thread(td) vm_object_t ksobj; vm_offset_t ks; vm_page_t m; - pt_entry_t *ptek, oldpte; + pt_entry_t *ptek; ksobj = td->td_kstack_obj; ks = td->td_kstack; @@ -1096,17 +1081,11 @@ pmap_dispose_thread(td) if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_busy(m); - oldpte = *(ptek + i); *(ptek + i) = 0; -#ifndef I386_CPU invlpg(ks + i * PAGE_SIZE); -#endif vm_page_unwire(m, 0); vm_page_free(m); } -#ifdef I386_CPU - invltlb(); -#endif } /* @@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif + pmap_invalidate_page(pmap, va); } return; } @@ -2281,13 +2254,7 @@ validate: if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif + pmap_invalidate_page(pmap, va); } } } @@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t pdnxt; pd_entry_t src_frame, dst_frame; vm_page_t m; - pd_entry_t saved_pde; if (dst_addr != src_addr) return; @@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, return; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; - if (dst_frame != (APTDpde & PG_FRAME)) { - APTDpde = dst_frame | PG_RW | PG_V; -#if defined(SMP) - /* The page directory is not shared between CPUs */ - cpu_invltlb(); -#else - invltlb(); -#endif - } - saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); - for(addr = src_addr; addr < end_addr; addr = pdnxt) { + for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; @@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pdnxt > end_addr) pdnxt = end_addr; + /* + * Have to recheck this before every avtopte() call below + * in case we have blocked and something else used APTDpde. + */ + if (dst_frame != (APTDpde & PG_FRAME)) { + APTDpde = dst_frame | PG_RW | PG_V; + invltlb(); + } src_pte = vtopte(addr); dst_pte = avtopte(addr); while (addr < pdnxt) { @@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); - if ((APTDpde & PG_FRAME) != - (saved_pde & PG_FRAME)) { - APTDpde = saved_pde; -printf ("IT HAPPENNED!"); -#if defined(SMP) - cpu_invltlb(); -#else - invltlb(); -#endif - } if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and @@ -2839,12 +2793,15 @@ void pmap_zero_page(vm_offset_t phys) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); - *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - invltlb_1pg((vm_offset_t)CADDR2); - + cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); @@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys) #endif bzero(CADDR2, PAGE_SIZE); *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } /* @@ -2864,12 +2824,15 @@ void pmap_zero_page_area(vm_offset_t phys, int off, int size) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); - *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - invltlb_1pg((vm_offset_t)CADDR2); - + cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); @@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size) #endif bzero((char *)CADDR2 + off, size); *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } /* @@ -2889,6 +2855,11 @@ void pmap_copy_page(vm_offset_t src, vm_offset_t dst) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) @@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst) *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -#endif - + cpu_invlpg((u_int)CADDR1); /* SMP: local only */ + cpu_invlpg((u_int)CADDR2); /* SMP: local only */ bcopy(CADDR1, CADDR2, PAGE_SIZE); - *CMAP1 = 0; *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } @@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; - for (tmpva = va; size > 0;) { + for (tmpva = va; size > 0; ) { pte = vtopte(tmpva); *pte = pa | PG_RW | PG_V | pgeflag; size -= PAGE_SIZE; tmpva += PAGE_SIZE; - pa += PAGE_SIZE; } - invltlb(); + invlpg_range(va, tmpva); return ((void *)(va + offset)); } @@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { - vm_offset_t base, offset; + vm_offset_t base, offset, tmpva; + pt_entry_t *pte; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); + + for (tmpva = base; size > 0; ) { + pte = vtopte(tmpva); + *pte = 0; + size -= PAGE_SIZE; + tmpva += PAGE_SIZE; + } + invlpg_range(va, tmpva); kmem_free(kernel_map, base, size); } diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index bc58672..0649009 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1591,42 +1591,6 @@ ENTRY(ssdtosd) popl %ebx ret -/* load_cr0(cr0) */ -ENTRY(load_cr0) - movl 4(%esp),%eax - movl %eax,%cr0 - ret - -/* rcr0() */ -ENTRY(rcr0) - movl %cr0,%eax - ret - -/* rcr3() */ -ENTRY(rcr3) - movl %cr3,%eax - ret - -/* void load_cr3(caddr_t cr3) */ -ENTRY(load_cr3) -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl 4(%esp),%eax - movl %eax,%cr3 - ret - -/* rcr4() */ -ENTRY(rcr4) - movl %cr4,%eax - ret - -/* void load_cr4(caddr_t cr4) */ -ENTRY(load_cr4) - movl 4(%esp),%eax - movl %eax,%cr4 - ret - /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index bc58672..0649009 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -1591,42 +1591,6 @@ ENTRY(ssdtosd) popl %ebx ret -/* load_cr0(cr0) */ -ENTRY(load_cr0) - movl 4(%esp),%eax - movl %eax,%cr0 - ret - -/* rcr0() */ -ENTRY(rcr0) - movl %cr0,%eax - ret - -/* rcr3() */ -ENTRY(rcr3) - movl %cr3,%eax - ret - -/* void load_cr3(caddr_t cr3) */ -ENTRY(load_cr3) -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl 4(%esp),%eax - movl %eax,%cr3 - ret - -/* rcr4() */ -ENTRY(rcr4) - movl %cr4,%eax - ret - -/* void load_cr4(caddr_t cr4) */ -ENTRY(load_cr4) - movl 4(%esp),%eax - movl %eax,%cr4 - ret - /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 969541f..94d5c3a 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -227,62 +227,6 @@ invd(void) __asm __volatile("invd"); } -#if defined(SMP) && defined(_KERNEL) - -/* - * When using APIC IPI's, invlpg() is not simply the invlpg instruction - * (this is a bug) and the inlining cost is prohibitive since the call - * executes into the IPI transmission system. - */ -void invlpg __P((u_int addr)); -void invltlb __P((void)); - -static __inline void -cpu_invlpg(void *addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -} - -static __inline void -cpu_invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); -#if defined(SWTCH_OPTIM_STATS) - ++tlb_flush_count; -#endif -} - -#else /* !(SMP && _KERNEL) */ - -static __inline void -invlpg(u_int addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -} - -static __inline void -invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); -#ifdef SWTCH_OPTIM_STATS - ++tlb_flush_count; -#endif -} - -#endif /* SMP && _KERNEL */ - static __inline u_short inw(u_int port) { @@ -348,15 +292,6 @@ outw(u_int port, u_short data) } static __inline u_int -rcr2(void) -{ - u_int data; - - __asm __volatile("movl %%cr2,%0" : "=r" (data)); - return (data); -} - -static __inline u_int read_eflags(void) { u_int ef; @@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval) __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } +static __inline void +load_cr0(u_int data) +{ + + __asm __volatile("movl %0,%%cr0" : : "r" (data)); +} + +static __inline u_int +rcr0(void) +{ + u_int data; + + __asm __volatile("movl %%cr0,%0" : "=r" (data)); + return (data); +} + +static __inline u_int +rcr2(void) +{ + u_int data; + + __asm __volatile("movl %%cr2,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr3(u_int data) +{ + + __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); +#if defined(SWTCH_OPTIM_STATS) + ++tlb_flush_count; +#endif +} + +static __inline u_int +rcr3(void) +{ + u_int data; + + __asm __volatile("movl %%cr3,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr4(u_int data) +{ + __asm __volatile("movl %0,%%cr4" : : "r" (data)); +} + +static __inline u_int +rcr4(void) +{ + u_int data; + + __asm __volatile("movl %%cr4,%0" : "=r" (data)); + return (data); +} + +/* + * Global TLB flush (except for thise for pages marked PG_G) + */ +static __inline void +cpu_invltlb(void) +{ + + load_cr3(rcr3()); +} + +/* + * TLB flush for an individual page (even if it has PG_G). + * Only works on 486+ CPUs (i386 does not have PG_G). + */ +static __inline void +cpu_invlpg(u_int addr) +{ + +#ifndef I386_CPU + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +#else + cpu_invltlb(); +#endif +} + +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +/* + * Same as above but for a range of pages. + */ +static __inline void +cpu_invlpg_range(u_int startva, u_int endva) +{ +#ifndef I386_CPU + u_int addr; + + for (addr = startva; addr < endva; addr += PAGE_SIZE) + __asm __volatile("invlpg %0" : : "m" (*(char *)addr)); + __asm __volatile("" : : : "memory"); +#else + cpu_invltlb(); +#endif +} +#endif + +#ifdef SMP +extern void smp_invlpg(u_int addr); +extern void smp_masked_invlpg(u_int mask, u_int addr); +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +extern void smp_invlpg_range(u_int startva, u_int endva); +extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva); +#endif +extern void smp_invltlb(void); +extern void smp_masked_invltlb(u_int mask); +#endif + +/* + * Generic page TLB flush. Takes care of SMP. + */ +static __inline void +invlpg(u_int addr) +{ + + cpu_invlpg(addr); +#ifdef SMP + smp_invlpg(addr); +#endif +} + +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +/* + * Generic TLB flush for a range of pages. Takes care of SMP. + * Saves many IPIs for SMP mode. + */ +static __inline void +invlpg_range(u_int startva, u_int endva) +{ + + cpu_invlpg_range(startva, endva); +#ifdef SMP + smp_invlpg_range(startva, endva); +#endif +} +#endif + +/* + * Generic global TLB flush (except for thise for pages marked PG_G) + */ +static __inline void +invltlb(void) +{ + + cpu_invltlb(); +#ifdef SMP + smp_invltlb(); +#endif +} + static __inline u_int rfs(void) { @@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); +void cpu_invlpg __P((u_int addr)); +void cpu_invlpg_range __P((u_int start, u_int end)); void disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); void enable_intr __P((void)); @@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt)); void invd __P((void)); void invlpg __P((u_int addr)); +void invlpg_range __P((u_int start, u_int end)); void invltlb __P((void)); u_short inw __P((u_int port)); +void load_cr0 __P((u_int cr0)); +void load_cr3 __P((u_int cr3)); +void load_cr4 __P((u_int cr4)); +void load_fs __P((u_int sel)); +void load_gs __P((u_int sel)); void outb __P((u_int port, u_char data)); void outl __P((u_int port, u_int data)); void outsb __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt)); void outw __P((u_int port, u_short data)); +u_int rcr0 __P((void)); u_int rcr2 __P((void)); +u_int rcr3 __P((void)); +u_int rcr4 __P((void)); +u_int rfs __P((void)); +u_int rgs __P((void)); u_int64_t rdmsr __P((u_int msr)); u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdtsc __P((void)); @@ -607,22 +711,12 @@ u_int read_eflags __P((void)); void wbinvd __P((void)); void write_eflags __P((u_int ef)); void wrmsr __P((u_int msr, u_int64_t newval)); -u_int rfs __P((void)); -u_int rgs __P((void)); -void load_fs __P((u_int sel)); -void load_gs __P((u_int sel)); critical_t cpu_critical_enter __P((void)); void cpu_critical_exit __P((critical_t eflags)); #endif /* __GNUC__ */ -void load_cr0 __P((u_int cr0)); -void load_cr3 __P((u_int cr3)); -void load_cr4 __P((u_int cr4)); void ltr __P((u_short sel)); -u_int rcr0 __P((void)); -u_int rcr3 __P((void)); -u_int rcr4 __P((void)); void reset_dbregs __P((void)); __END_DECLS diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 27ee7ae..008dfc5 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index cb5a24d..618bb3f 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t)); pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); -#ifdef SMP void pmap_set_opt __P((void)); -#endif #endif /* _KERNEL */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 34228e2..4136c20 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */ * Interprocessor interrupts for SMP. */ #define IPI_INVLTLB XINVLTLB_OFFSET +#define IPI_INVLPG XINVLPG_OFFSET +#define IPI_INVLRNG XINVLRNG_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_AST XCPUAST_OFFSET #define IPI_STOP XCPUSTOP_OFFSET @@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); -void smp_invltlb __P((void)); void forward_statclock __P((void)); void forwarded_statclock __P((struct trapframe frame)); void forward_hardclock __P((void)); diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index cfc162b..92bf581 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; -#ifdef APIC_INTR_REORDER -#ifdef APIC_INTR_HIGHPRI_CLOCK - /* XXX: Hack (kludge?) for more accurate clock. */ - if (intr == apic_8254_intr || intr == 8) { - vector = TPR_FAST_INTS + intr; - } -#endif -#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index 1726635..789b02b 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -88,6 +88,7 @@ /* IDT vector base for regular (aka. slow) and fast interrupts */ #define TPR_SLOW_INTS 0x20 #define TPR_FAST_INTS 0x60 +/* XXX note that the AST interrupt is at 0x50 */ /* blocking values for local APIC Task Priority Register */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */ @@ -104,20 +105,23 @@ #endif /** TEST_TEST1 */ /* TLB shootdowns */ -#define XINVLTLB_OFFSET (ICU_OFFSET + 112) +#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */ +#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */ +#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */ /* inter-cpu clock handling */ -#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) -#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) +#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */ +#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */ /* inter-CPU rendezvous */ -#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) +#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */ /* IPI to generate an additional software trap at the target CPU */ -#define XCPUAST_OFFSET (ICU_OFFSET + 48) +/* XXX in the middle of the interrupt range, overlapping IRQ48 */ +#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */ /* IPI to signal CPUs to stop and wait for another CPU to restart them */ -#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) +#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */ /* * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: @@ -181,7 +185,9 @@ inthand_t IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); inthand_t - Xinvltlb, /* TLB shootdowns */ + Xinvltlb, /* TLB shootdowns - global */ + Xinvlpg, /* TLB shootdowns - 1 page */ + Xinvlrng, /* TLB shootdowns - page range */ Xhardclock, /* Forward hardclock() */ Xstatclock, /* Forward statclock() */ Xcpuast, /* Additional software trap on other cpu */ diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c index cfc162b..92bf581 100644 --- a/sys/amd64/isa/nmi.c +++ b/sys/amd64/isa/nmi.c @@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; -#ifdef APIC_INTR_REORDER -#ifdef APIC_INTR_HIGHPRI_CLOCK - /* XXX: Hack (kludge?) for more accurate clock. */ - if (intr == apic_8254_intr || intr == 8) { - vector = TPR_FAST_INTS + intr; - } -#endif -#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index b505dbd..3848b0a 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -5,6 +5,7 @@ DISABLE_PSE MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h PMAP_SHPGPERPROC opt_pmap.h +ENABLE_PG_G opt_pmap.h PPC_PROBE_CHIPSET opt_ppc.h PPC_DEBUG opt_ppc.h SHOW_BUSYBUFS diff --git a/sys/conf/options.pc98 b/sys/conf/options.pc98 index 6cf1db2..9b71078 100644 --- a/sys/conf/options.pc98 +++ b/sys/conf/options.pc98 @@ -5,6 +5,7 @@ DISABLE_PSE MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h PMAP_SHPGPERPROC opt_pmap.h +ENABLE_PG_G opt_pmap.h PPC_PROBE_CHIPSET opt_ppc.h PPC_DEBUG opt_ppc.h SHOW_BUSYBUFS diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 95c9133..e3a37e1 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -181,30 +181,108 @@ Xspuriousint: iret /* - * Handle TLB shootdowns. + * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax + movl $KPSEL, %eax /* Private space selector */ mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - ss - incl _xhits(,%eax,4) + incl xhits_gbl(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 - ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Single page TLB shootdown + */ + .text + SUPERALIGN_TEXT + .globl Xinvlpg +Xinvlpg: + pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + ss + incl xhits_pg(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %eax + invlpg (%eax) /* invalidate single page */ + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Page range TLB shootdown. + */ + .text + SUPERALIGN_TEXT + .globl Xinvlrng +Xinvlrng: + pushl %eax + pushl %edx + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + incl xhits_rng(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %edx + movl smp_tlb_addr2, %eax +1: invlpg (%edx) /* invalidate single page */ + addl $PAGE_SIZE, %edx + cmpl %edx, %eax + jb 1b + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %edx popl %eax iret @@ -443,12 +521,6 @@ Xrendezvous: .data -#ifdef COUNT_XINVLTLB_HITS - .globl _xhits -_xhits: - .space (NCPU * 4), 0 -#endif /* COUNT_XINVLTLB_HITS */ - .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 4fff220..299bc3e 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -381,12 +381,6 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_PGE, %eax - movl %eax, %cr4 -1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -809,14 +803,7 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 2f - orl $PG_G,%edx -#endif - -2: movl $R(etext),%ecx + movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -827,13 +814,7 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx -#if !defined(SMP) - testl $CPUID_PGE, R(cpu_feature) - jz 1f - orl $PG_G,%edx -#endif - -1: movl R(KERNend),%ecx + movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 27ee7ae..008dfc5 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index dc7861f..d3f4d3d 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -101,9 +101,6 @@ apic_initialize(void) #endif /** TEST_TEST1 */ lapic.svr = temp; - - if (bootverbose) - apic_dump("apic_initialize()"); } diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 27ee7ae..008dfc5 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index f12cb0b..ba3ee22 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -85,6 +85,9 @@ #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> +#if defined(SMP) +#include <sys/smp.h> +#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -101,7 +104,6 @@ #include <machine/md_var.h> #include <machine/specialreg.h> #if defined(SMP) || defined(APIC_IO) -#include <machine/smp.h> #include <machine/apic.h> #include <machine/segments.h> #include <machine/tss.h> @@ -259,10 +261,10 @@ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; + #ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) { + if (cpu_feature & CPUID_PSE) newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); - } #endif return newaddr; } @@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr) PTD[i] = 0; pgeflag = 0; -#if !defined(SMP) /* XXX - see also mp_machdep.c */ - if (cpu_feature & CPUID_PGE) { +#if !defined(SMP) || defined(ENABLE_PG_G) + if (cpu_feature & CPUID_PGE) pgeflag = PG_G; - } #endif /* @@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ pdir4mb = 0; -#if !defined(DISABLE_PSE) +#ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) { pd_entry_t ptditmp; /* @@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - -#if !defined(SMP) - /* - * Enable the PSE mode. - */ - load_cr4(rcr4() | CR4_PSE); - - /* - * We can do the mapping here for the single processor - * case. We simply ignore the old page table page from - * now on. - */ - /* - * For SMP, we still need 4K pages to bootstrap APs, - * PSE will be enabled as soon as all APs are up. - */ - PTD[KPTDI] = (pd_entry_t) ptditmp; - kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; - invltlb(); -#endif } #endif - +#ifndef SMP + /* + * Turn on PGE/PSE. SMP does this later on since the + * 4K page tables are required for AP boot (for now). + * XXX fixme. + */ + pmap_set_opt(); +#endif #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); - /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif - - invltlb(); + cpu_invltlb(); } -#ifdef SMP /* - * Set 4mb pdir for mp startup + * Enable 4MB page mode for MP startup. Turn on PG_G support. + * BSP will run this after all the AP's have started up. */ void pmap_set_opt(void) { - if (pseflag && (cpu_feature & CPUID_PSE)) { + pt_entry_t *pte; + vm_offset_t va; + + if (pgeflag && (cpu_feature & CPUID_PGE)) + load_cr4(rcr4() | CR4_PGE); +#ifndef DISABLE_PSE + if (pseflag && (cpu_feature & CPUID_PSE)) load_cr4(rcr4() | CR4_PSE); - if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ +#endif + if (PCPU_GET(cpuid) == 0) { +#ifndef DISABLE_PSE + if (pdir4mb) kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; - cpu_invltlb(); +#endif + if (pgeflag) { + /* XXX see earlier comments about virtual_avail */ + for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE) + { + pte = vtopte(va); + if (*pte) + *pte |= pgeflag; + } } - } + /* + * for SMP, this will cause all cpus to reload again, which + * is actually what we want since they now have CR4_PGE on. + */ + invltlb(); + } else + cpu_invltlb(); } -#endif /* * Initialize the pmap module. @@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va) return 0; } -static PMAP_INLINE void -invltlb_1pg(vm_offset_t va) -{ -#ifdef I386_CPU - invltlb(); -#else - invlpg(va); -#endif -} - static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { #if defined(SMP) - if (pmap->pm_active & PCPU_GET(cpumask)) - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); + u_int cpumask; + u_int other_cpus; + struct thread *td; + + td = curthread; + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + */ + if (td->td_critnest == 1) + cpu_critical_exit(td->td_savecrit); + if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { + invlpg(va); /* global */ + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + cpu_invlpg(va); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg(pmap->pm_active & other_cpus, va); + } + critical_exit(); #else if (pmap->pm_active) - invltlb_1pg(va); + cpu_invlpg(va); #endif } @@ -580,10 +598,30 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { #if defined(SMP) - if (pmap->pm_active & PCPU_GET(cpumask)) - cpu_invltlb(); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); + u_int cpumask; + u_int other_cpus; + struct thread *td; + + td = curthread; + critical_enter(); + /* + * We need to disable interrupt preemption but MUST NOT have + * interrupts disabled here. + * XXX we may need to hold schedlock to get a coherent pm_active + */ + if (td->td_critnest == 1) + cpu_critical_exit(td->td_savecrit); + if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { + invltlb(); /* global */ + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + cpu_invltlb(); + if (pmap->pm_active & other_cpus) + smp_masked_invltlb(pmap->pm_active & other_cpus); + } + critical_exit(); #else if (pmap->pm_active) invltlb(); @@ -609,12 +647,7 @@ get_ptbase(pmap) /* otherwise, we are alternate address space */ if (frame != (APTDpde & PG_FRAME)) { APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); -#if defined(SMP) - /* The page directory is not shared between CPUs */ - cpu_invltlb(); -#else invltlb(); -#endif } return APTmap; } @@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va) newpf = pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; - invltlb_1pg((vm_offset_t) PADDR1); + pmap_invalidate_page(pmap, (vm_offset_t) PADDR1); } return PADDR1 + (index & (NPTEPG - 1)); } @@ -689,20 +722,17 @@ pmap_extract(pmap, va) /* * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a invltlb after doing the pmap_kenter... */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_offset_t pa) { pt_entry_t *pte; - pt_entry_t npte, opte; + pt_entry_t npte; npte = pa | PG_RW | PG_V | pgeflag; pte = vtopte(va); - opte = *pte; *pte = npte; - invltlb_1pg(va); + invlpg(va); } /* @@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va) pte = vtopte(va); *pte = 0; - invltlb_1pg(va); + invlpg(va); } /* @@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va) vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { - vm_offset_t sva = *virt; - vm_offset_t va = sva; + vm_offset_t va, sva; + pt_entry_t *pte; + + va = sva = *virt; while (start < end) { - pmap_kenter(va, start); + pte = vtopte(va); + *pte = start | PG_RW | PG_V | pgeflag; va += PAGE_SIZE; start += PAGE_SIZE; } + invlpg_range(sva, end); *virt = va; return (sva); } @@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) * over. The page *must* be wired. */ void -pmap_qenter(vm_offset_t va, vm_page_t *m, int count) +pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) { - vm_offset_t end_va; + vm_offset_t va, end_va; + pt_entry_t *pte; + va = sva; end_va = va + count * PAGE_SIZE; - - while (va < end_va) { - pt_entry_t *pte; + while (va < end_va) { pte = vtopte(va); *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; -#ifdef SMP - cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; m++; } -#ifdef SMP - smp_invltlb(); -#endif + invlpg_range(sva, end_va); } /* @@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count) * kernel -- it is meant only for temporary mappings. */ void -pmap_qremove(vm_offset_t va, int count) +pmap_qremove(vm_offset_t sva, int count) { - vm_offset_t end_va; + pt_entry_t *pte; + vm_offset_t va, end_va; - end_va = va + count*PAGE_SIZE; + va = sva; + end_va = va + count * PAGE_SIZE; while (va < end_va) { - pt_entry_t *pte; - pte = vtopte(va); *pte = 0; -#ifdef SMP - cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; } -#ifdef SMP - smp_invltlb(); -#endif + invlpg_range(sva, end_va); } static vm_page_t @@ -824,9 +844,6 @@ retry: void pmap_new_proc(struct proc *p) { -#ifdef I386_CPU - int updateneeded = 0; -#endif int i; vm_object_t upobj; vm_offset_t up; @@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p) * Enter the page into the kernel address space. */ *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(up + i * PAGE_SIZE); -#endif - } vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } -#ifdef I386_CPU - if (updateneeded) - invltlb(); -#endif } /* @@ -901,7 +909,7 @@ pmap_dispose_proc(p) vm_object_t upobj; vm_offset_t up; vm_page_t m; - pt_entry_t *ptek, oldpte; + pt_entry_t *ptek; upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; @@ -911,17 +919,11 @@ pmap_dispose_proc(p) if (m == NULL) panic("pmap_dispose_proc: upage already missing?"); vm_page_busy(m); - oldpte = *(ptek + i); *(ptek + i) = 0; -#ifndef I386_CPU invlpg(up + i * PAGE_SIZE); -#endif vm_page_unwire(m, 0); vm_page_free(m); } -#ifdef I386_CPU - invltlb(); -#endif } /* @@ -986,9 +988,6 @@ pmap_swapin_proc(p) void pmap_new_thread(struct thread *td) { -#ifdef I386_CPU - int updateneeded = 0; -#endif int i; vm_object_t ksobj; vm_page_t m; @@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td) ptek = vtopte(ks - PAGE_SIZE); oldpte = *ptek; *ptek = 0; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(ks - PAGE_SIZE); -#endif - } ptek++; #else /* get a kernel virtual address for the kstack for this thread */ @@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td) * Enter the page into the kernel address space. */ *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; - if (oldpte) { -#ifdef I386_CPU - updateneeded = 1; -#else + if (oldpte) invlpg(ks + i * PAGE_SIZE); -#endif - } vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } -#ifdef I386_CPU - if (updateneeded) - invltlb(); -#endif } /* @@ -1086,7 +1071,7 @@ pmap_dispose_thread(td) vm_object_t ksobj; vm_offset_t ks; vm_page_t m; - pt_entry_t *ptek, oldpte; + pt_entry_t *ptek; ksobj = td->td_kstack_obj; ks = td->td_kstack; @@ -1096,17 +1081,11 @@ pmap_dispose_thread(td) if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_busy(m); - oldpte = *(ptek + i); *(ptek + i) = 0; -#ifndef I386_CPU invlpg(ks + i * PAGE_SIZE); -#endif vm_page_unwire(m, 0); vm_page_free(m); } -#ifdef I386_CPU - invltlb(); -#endif } /* @@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif + pmap_invalidate_page(pmap, va); } return; } @@ -2281,13 +2254,7 @@ validate: if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & PCPU_GET(other_cpus)) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif + pmap_invalidate_page(pmap, va); } } } @@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t pdnxt; pd_entry_t src_frame, dst_frame; vm_page_t m; - pd_entry_t saved_pde; if (dst_addr != src_addr) return; @@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, return; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; - if (dst_frame != (APTDpde & PG_FRAME)) { - APTDpde = dst_frame | PG_RW | PG_V; -#if defined(SMP) - /* The page directory is not shared between CPUs */ - cpu_invltlb(); -#else - invltlb(); -#endif - } - saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); - for(addr = src_addr; addr < end_addr; addr = pdnxt) { + for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; @@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pdnxt > end_addr) pdnxt = end_addr; + /* + * Have to recheck this before every avtopte() call below + * in case we have blocked and something else used APTDpde. + */ + if (dst_frame != (APTDpde & PG_FRAME)) { + APTDpde = dst_frame | PG_RW | PG_V; + invltlb(); + } src_pte = vtopte(addr); dst_pte = avtopte(addr); while (addr < pdnxt) { @@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); - if ((APTDpde & PG_FRAME) != - (saved_pde & PG_FRAME)) { - APTDpde = saved_pde; -printf ("IT HAPPENNED!"); -#if defined(SMP) - cpu_invltlb(); -#else - invltlb(); -#endif - } if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and @@ -2839,12 +2793,15 @@ void pmap_zero_page(vm_offset_t phys) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); - *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - invltlb_1pg((vm_offset_t)CADDR2); - + cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); @@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys) #endif bzero(CADDR2, PAGE_SIZE); *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } /* @@ -2864,12 +2824,15 @@ void pmap_zero_page_area(vm_offset_t phys, int off, int size) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); - *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - invltlb_1pg((vm_offset_t)CADDR2); - + cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); @@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size) #endif bzero((char *)CADDR2 + off, size); *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } /* @@ -2889,6 +2855,11 @@ void pmap_copy_page(vm_offset_t src, vm_offset_t dst) { +#ifdef SMP + /* XXX overkill, we only want to disable migration here */ + /* XXX or maybe not. down the track we have reentrancy issues */ + critical_enter(); +#endif if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) @@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst) *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -#endif - + cpu_invlpg((u_int)CADDR1); /* SMP: local only */ + cpu_invlpg((u_int)CADDR2); /* SMP: local only */ bcopy(CADDR1, CADDR2, PAGE_SIZE); - *CMAP1 = 0; *CMAP2 = 0; +#ifdef SMP + critical_exit(); +#endif } @@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; - for (tmpva = va; size > 0;) { + for (tmpva = va; size > 0; ) { pte = vtopte(tmpva); *pte = pa | PG_RW | PG_V | pgeflag; size -= PAGE_SIZE; tmpva += PAGE_SIZE; - pa += PAGE_SIZE; } - invltlb(); + invlpg_range(va, tmpva); return ((void *)(va + offset)); } @@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { - vm_offset_t base, offset; + vm_offset_t base, offset, tmpva; + pt_entry_t *pte; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); + + for (tmpva = base; size > 0; ) { + pte = vtopte(tmpva); + *pte = 0; + size -= PAGE_SIZE; + tmpva += PAGE_SIZE; + } + invlpg_range(va, tmpva); kmem_free(kernel_map, base, size); } diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index bc58672..0649009 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -1591,42 +1591,6 @@ ENTRY(ssdtosd) popl %ebx ret -/* load_cr0(cr0) */ -ENTRY(load_cr0) - movl 4(%esp),%eax - movl %eax,%cr0 - ret - -/* rcr0() */ -ENTRY(rcr0) - movl %cr0,%eax - ret - -/* rcr3() */ -ENTRY(rcr3) - movl %cr3,%eax - ret - -/* void load_cr3(caddr_t cr3) */ -ENTRY(load_cr3) -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl 4(%esp),%eax - movl %eax,%cr3 - ret - -/* rcr4() */ -ENTRY(rcr4) - movl %cr4,%eax - ret - -/* void load_cr4(caddr_t cr4) */ -ENTRY(load_cr4) - movl 4(%esp),%eax - movl %eax,%cr4 - ret - /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 969541f..94d5c3a 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -227,62 +227,6 @@ invd(void) __asm __volatile("invd"); } -#if defined(SMP) && defined(_KERNEL) - -/* - * When using APIC IPI's, invlpg() is not simply the invlpg instruction - * (this is a bug) and the inlining cost is prohibitive since the call - * executes into the IPI transmission system. - */ -void invlpg __P((u_int addr)); -void invltlb __P((void)); - -static __inline void -cpu_invlpg(void *addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -} - -static __inline void -cpu_invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); -#if defined(SWTCH_OPTIM_STATS) - ++tlb_flush_count; -#endif -} - -#else /* !(SMP && _KERNEL) */ - -static __inline void -invlpg(u_int addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -} - -static __inline void -invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); -#ifdef SWTCH_OPTIM_STATS - ++tlb_flush_count; -#endif -} - -#endif /* SMP && _KERNEL */ - static __inline u_short inw(u_int port) { @@ -348,15 +292,6 @@ outw(u_int port, u_short data) } static __inline u_int -rcr2(void) -{ - u_int data; - - __asm __volatile("movl %%cr2,%0" : "=r" (data)); - return (data); -} - -static __inline u_int read_eflags(void) { u_int ef; @@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval) __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } +static __inline void +load_cr0(u_int data) +{ + + __asm __volatile("movl %0,%%cr0" : : "r" (data)); +} + +static __inline u_int +rcr0(void) +{ + u_int data; + + __asm __volatile("movl %%cr0,%0" : "=r" (data)); + return (data); +} + +static __inline u_int +rcr2(void) +{ + u_int data; + + __asm __volatile("movl %%cr2,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr3(u_int data) +{ + + __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); +#if defined(SWTCH_OPTIM_STATS) + ++tlb_flush_count; +#endif +} + +static __inline u_int +rcr3(void) +{ + u_int data; + + __asm __volatile("movl %%cr3,%0" : "=r" (data)); + return (data); +} + +static __inline void +load_cr4(u_int data) +{ + __asm __volatile("movl %0,%%cr4" : : "r" (data)); +} + +static __inline u_int +rcr4(void) +{ + u_int data; + + __asm __volatile("movl %%cr4,%0" : "=r" (data)); + return (data); +} + +/* + * Global TLB flush (except for thise for pages marked PG_G) + */ +static __inline void +cpu_invltlb(void) +{ + + load_cr3(rcr3()); +} + +/* + * TLB flush for an individual page (even if it has PG_G). + * Only works on 486+ CPUs (i386 does not have PG_G). + */ +static __inline void +cpu_invlpg(u_int addr) +{ + +#ifndef I386_CPU + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +#else + cpu_invltlb(); +#endif +} + +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +/* + * Same as above but for a range of pages. + */ +static __inline void +cpu_invlpg_range(u_int startva, u_int endva) +{ +#ifndef I386_CPU + u_int addr; + + for (addr = startva; addr < endva; addr += PAGE_SIZE) + __asm __volatile("invlpg %0" : : "m" (*(char *)addr)); + __asm __volatile("" : : : "memory"); +#else + cpu_invltlb(); +#endif +} +#endif + +#ifdef SMP +extern void smp_invlpg(u_int addr); +extern void smp_masked_invlpg(u_int mask, u_int addr); +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +extern void smp_invlpg_range(u_int startva, u_int endva); +extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva); +#endif +extern void smp_invltlb(void); +extern void smp_masked_invltlb(u_int mask); +#endif + +/* + * Generic page TLB flush. Takes care of SMP. + */ +static __inline void +invlpg(u_int addr) +{ + + cpu_invlpg(addr); +#ifdef SMP + smp_invlpg(addr); +#endif +} + +#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ +/* + * Generic TLB flush for a range of pages. Takes care of SMP. + * Saves many IPIs for SMP mode. + */ +static __inline void +invlpg_range(u_int startva, u_int endva) +{ + + cpu_invlpg_range(startva, endva); +#ifdef SMP + smp_invlpg_range(startva, endva); +#endif +} +#endif + +/* + * Generic global TLB flush (except for thise for pages marked PG_G) + */ +static __inline void +invltlb(void) +{ + + cpu_invltlb(); +#ifdef SMP + smp_invltlb(); +#endif +} + static __inline u_int rfs(void) { @@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); +void cpu_invlpg __P((u_int addr)); +void cpu_invlpg_range __P((u_int start, u_int end)); void disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); void enable_intr __P((void)); @@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt)); void invd __P((void)); void invlpg __P((u_int addr)); +void invlpg_range __P((u_int start, u_int end)); void invltlb __P((void)); u_short inw __P((u_int port)); +void load_cr0 __P((u_int cr0)); +void load_cr3 __P((u_int cr3)); +void load_cr4 __P((u_int cr4)); +void load_fs __P((u_int sel)); +void load_gs __P((u_int sel)); void outb __P((u_int port, u_char data)); void outl __P((u_int port, u_int data)); void outsb __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt)); void outw __P((u_int port, u_short data)); +u_int rcr0 __P((void)); u_int rcr2 __P((void)); +u_int rcr3 __P((void)); +u_int rcr4 __P((void)); +u_int rfs __P((void)); +u_int rgs __P((void)); u_int64_t rdmsr __P((u_int msr)); u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdtsc __P((void)); @@ -607,22 +711,12 @@ u_int read_eflags __P((void)); void wbinvd __P((void)); void write_eflags __P((u_int ef)); void wrmsr __P((u_int msr, u_int64_t newval)); -u_int rfs __P((void)); -u_int rgs __P((void)); -void load_fs __P((u_int sel)); -void load_gs __P((u_int sel)); critical_t cpu_critical_enter __P((void)); void cpu_critical_exit __P((critical_t eflags)); #endif /* __GNUC__ */ -void load_cr0 __P((u_int cr0)); -void load_cr3 __P((u_int cr3)); -void load_cr4 __P((u_int cr4)); void ltr __P((u_short sel)); -u_int rcr0 __P((void)); -u_int rcr3 __P((void)); -u_int rcr4 __P((void)); void reset_dbregs __P((void)); __END_DECLS diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 27ee7ae..008dfc5 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; +#ifdef APIC_IO +/* Variables needed for SMP tlb shootdown. */ +u_int smp_tlb_addr1; +u_int smp_tlb_addr2; +volatile int smp_tlb_wait; +static struct mtx smp_tlb_mtx; +#endif + /* * Local data and functions. */ @@ -335,6 +343,9 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ +#ifdef APIC_IO + mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); +#endif } /* @@ -604,6 +615,10 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLPG_OFFSET, Xinvlpg, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(XINVLRNG_OFFSET, Xinvlrng, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } +#if defined(APIC_IO) + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif + /* * Flush the TLB on all other CPU's - * - * XXX: Needs to handshake and wait for completion before proceding. */ +static void +smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) +{ + u_int ncpu; + register_t eflags; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + ipi_all_but_self(vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) +{ + u_int m; + int i, ncpu, othercpus; + register_t eflags; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + /* XXX there should be a pcpu self mask */ + mask &= ~(1 << PCPU_GET(cpuid)); + if (mask == 0) + return; + /* Count the target cpus */ + ncpu = 0; + m = mask; + while ((i = ffs(m)) != 0) { + m >>= i; + ncpu++; + } + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + eflags = read_eflags(); + if ((eflags & PSL_I) == 0) + panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); + mtx_lock_spin(&smp_tlb_mtx); + smp_tlb_addr1 = addr1; + smp_tlb_addr2 = addr2; + smp_tlb_wait = 0; + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) + /* XXX cpu_pause() */ ; + mtx_unlock_spin(&smp_tlb_mtx); +} +#endif + void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) - ipi_all_but_self(IPI_INVLTLB); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } #endif /* APIC_IO */ } void -invlpg(u_int addr) +smp_invlpg(u_int addr) { - __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_invlpg_range(u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } void -invltlb(void) +smp_masked_invltlb(u_int mask) { - u_long temp; +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +#endif /* APIC_IO */ +} - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() is - * inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); +void +smp_masked_invlpg(u_int mask, u_int addr) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +#endif /* APIC_IO */ +} - /* send a message to the other CPUs */ - smp_invltlb(); +void +smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) +{ +#if defined(APIC_IO) + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +#endif /* APIC_IO */ } @@ -2280,6 +2451,9 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + if (bootverbose) + apic_dump("ap_init()"); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), + TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index cb5a24d..618bb3f 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t)); pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); -#ifdef SMP void pmap_set_opt __P((void)); -#endif #endif /* _KERNEL */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 34228e2..4136c20 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */ * Interprocessor interrupts for SMP. */ #define IPI_INVLTLB XINVLTLB_OFFSET +#define IPI_INVLPG XINVLPG_OFFSET +#define IPI_INVLRNG XINVLRNG_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_AST XCPUAST_OFFSET #define IPI_STOP XCPUSTOP_OFFSET @@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); -void smp_invltlb __P((void)); void forward_statclock __P((void)); void forwarded_statclock __P((struct trapframe frame)); void forward_hardclock __P((void)); diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index d666148..ea8e84b 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -90,13 +90,6 @@ */ #define APIC_INTR_REORDER -/* - * Redirect clock interrupts to a higher priority (fast intr) vector, - * while still using the slow interrupt handler. Only effective when - * APIC_INTR_REORDER is defined. - */ -#define APIC_INTR_HIGHPRI_CLOCK - #endif /* APIC_IO */ /* diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index 95c9133..e3a37e1 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -181,30 +181,108 @@ Xspuriousint: iret /* - * Handle TLB shootdowns. + * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax + movl $KPSEL, %eax /* Private space selector */ mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - ss - incl _xhits(,%eax,4) + incl xhits_gbl(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 - ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Single page TLB shootdown + */ + .text + SUPERALIGN_TEXT + .globl Xinvlpg +Xinvlpg: + pushl %eax + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + ss + incl xhits_pg(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %eax + invlpg (%eax) /* invalidate single page */ + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %eax + iret + +/* + * Page range TLB shootdown. + */ + .text + SUPERALIGN_TEXT + .globl Xinvlrng +Xinvlrng: + pushl %eax + pushl %edx + pushl %ds + movl $KDSEL, %eax /* Kernel data selector */ + mov %ax, %ds + +#ifdef COUNT_XINVLTLB_HITS + pushl %fs + movl $KPSEL, %eax /* Private space selector */ + mov %ax, %fs + movl PCPU(CPUID), %eax + popl %fs + incl xhits_rng(,%eax,4) +#endif /* COUNT_XINVLTLB_HITS */ + + movl smp_tlb_addr1, %edx + movl smp_tlb_addr2, %eax +1: invlpg (%edx) /* invalidate single page */ + addl $PAGE_SIZE, %edx + cmpl %edx, %eax + jb 1b + + movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ + + lock + incl smp_tlb_wait + + popl %ds + popl %edx popl %eax iret @@ -443,12 +521,6 @@ Xrendezvous: .data -#ifdef COUNT_XINVLTLB_HITS - .globl _xhits -_xhits: - .space (NCPU * 4), 0 -#endif /* COUNT_XINVLTLB_HITS */ - .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index cfc162b..92bf581 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; -#ifdef APIC_INTR_REORDER -#ifdef APIC_INTR_HIGHPRI_CLOCK - /* XXX: Hack (kludge?) for more accurate clock. */ - if (intr == apic_8254_intr || intr == 8) { - vector = TPR_FAST_INTS + intr; - } -#endif -#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index 1726635..789b02b 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -88,6 +88,7 @@ /* IDT vector base for regular (aka. slow) and fast interrupts */ #define TPR_SLOW_INTS 0x20 #define TPR_FAST_INTS 0x60 +/* XXX note that the AST interrupt is at 0x50 */ /* blocking values for local APIC Task Priority Register */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */ @@ -104,20 +105,23 @@ #endif /** TEST_TEST1 */ /* TLB shootdowns */ -#define XINVLTLB_OFFSET (ICU_OFFSET + 112) +#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */ +#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */ +#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */ /* inter-cpu clock handling */ -#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) -#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) +#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */ +#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */ /* inter-CPU rendezvous */ -#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) +#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */ /* IPI to generate an additional software trap at the target CPU */ -#define XCPUAST_OFFSET (ICU_OFFSET + 48) +/* XXX in the middle of the interrupt range, overlapping IRQ48 */ +#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */ /* IPI to signal CPUs to stop and wait for another CPU to restart them */ -#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) +#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */ /* * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: @@ -181,7 +185,9 @@ inthand_t IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); inthand_t - Xinvltlb, /* TLB shootdowns */ + Xinvltlb, /* TLB shootdowns - global */ + Xinvlpg, /* TLB shootdowns - 1 page */ + Xinvlrng, /* TLB shootdowns - page range */ Xhardclock, /* Forward hardclock() */ Xstatclock, /* Forward statclock() */ Xcpuast, /* Additional software trap on other cpu */ diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c index cfc162b..92bf581 100644 --- a/sys/i386/isa/nmi.c +++ b/sys/i386/isa/nmi.c @@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; -#ifdef APIC_INTR_REORDER -#ifdef APIC_INTR_HIGHPRI_CLOCK - /* XXX: Hack (kludge?) for more accurate clock. */ - if (intr == apic_8254_intr || intr == 8) { - vector = TPR_FAST_INTS + intr; - } -#endif -#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 39e3243..be50836 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -222,6 +222,9 @@ static struct witness_order_list_entry order_lists[] = { { "icu", &lock_class_mtx_spin }, #ifdef SMP { "smp rendezvous", &lock_class_mtx_spin }, +#ifdef __i386__ + { "tlb", &lock_class_mtx_spin }, +#endif #endif { "clk", &lock_class_mtx_spin }, { NULL, NULL }, |