diff options
Diffstat (limited to 'sys')
41 files changed, 1037 insertions, 2193 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index e3a37e1..95c9133 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -181,108 +181,30 @@ Xspuriousint: iret /* - * Global address space TLB shootdown. + * Handle TLB shootdowns. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax /* Private space selector */ + movl $KPSEL, %eax mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - incl xhits_gbl(,%eax,4) + ss + incl _xhits(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 + ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Single page TLB shootdown - */ - .text - SUPERALIGN_TEXT - .globl Xinvlpg -Xinvlpg: - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - ss - incl xhits_pg(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %eax - invlpg (%eax) /* invalidate single page */ - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Page range TLB shootdown. - */ - .text - SUPERALIGN_TEXT - .globl Xinvlrng -Xinvlrng: - pushl %eax - pushl %edx - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_rng(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %edx - movl smp_tlb_addr2, %eax -1: invlpg (%edx) /* invalidate single page */ - addl $PAGE_SIZE, %edx - cmpl %edx, %eax - jb 1b - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %edx popl %eax iret @@ -521,6 +443,12 @@ Xrendezvous: .data +#ifdef COUNT_XINVLTLB_HITS + .globl _xhits +_xhits: + .space (NCPU * 4), 0 +#endif /* COUNT_XINVLTLB_HITS */ + .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/amd64/amd64/bios.c b/sys/amd64/amd64/bios.c index c4ff2d5..3d8f357 100644 --- a/sys/amd64/amd64/bios.c +++ b/sys/amd64/amd64/bios.c @@ -320,8 +320,7 @@ bios16(struct bios_args *args, char *fmt, ...) va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; - pt_entry_t *pte; - pd_entry_t *ptd; + u_int *pte, *ptd; arg_start = 0xffffffff; arg_end = 0; @@ -380,19 +379,19 @@ bios16(struct bios_args *args, char *fmt, ...) args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; args->seg.code32.limit = 0xffff; - ptd = (pd_entry_t *)rcr3(); + ptd = (u_int *)rcr3(); if (ptd == IdlePTD) { /* * no page table, so create one and install it. */ - pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + pte = (u_int *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + ptd = (u_int *)((u_int)ptd + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* * this is a user-level page table */ - pte = PTmap; + pte = (u_int *)&PTmap; } /* * install pointer to page 0. we don't need to flush the tlb, @@ -449,7 +448,7 @@ bios16(struct bios_args *args, char *fmt, ...) i = bios16_call(&args->r, stack_top); - if (pte == PTmap) { + if (pte == (u_int *)&PTmap) { *pte = 0; /* remove entry */ } else { *ptd = 0; /* remove page table */ diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index 299bc3e..4fff220 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -381,6 +381,12 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + movl %cr4, %eax + orl $CR4_PGE, %eax + movl %eax, %cr4 +1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -803,7 +809,14 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - movl $R(etext),%ecx + +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 2f + orl $PG_G,%edx +#endif + +2: movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -814,7 +827,13 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx - movl R(KERNend),%ecx +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + orl $PG_G,%edx +#endif + +1: movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index 299bc3e..4fff220 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -381,6 +381,12 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + movl %cr4, %eax + orl $CR4_PGE, %eax + movl %eax, %cr4 +1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -803,7 +809,14 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - movl $R(etext),%ecx + +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 2f + orl $PG_G,%edx +#endif + +2: movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -814,7 +827,13 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx - movl R(KERNend),%ecx +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + orl $PG_G,%edx +#endif + +1: movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 008dfc5..27ee7ae 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 008dfc5..27ee7ae 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a18d0da..f12cb0b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -85,9 +85,6 @@ #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> -#if defined(SMP) -#include <sys/smp.h> -#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -104,6 +101,7 @@ #include <machine/md_var.h> #include <machine/specialreg.h> #if defined(SMP) || defined(APIC_IO) +#include <machine/smp.h> #include <machine/apic.h> #include <machine/segments.h> #include <machine/tss.h> @@ -261,10 +259,10 @@ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; - #ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) + if (cpu_feature & CPUID_PSE) { newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); + } #endif return newaddr; } @@ -369,9 +367,10 @@ pmap_bootstrap(firstaddr, loadaddr) PTD[i] = 0; pgeflag = 0; -#if /* !defined(SMP) || */ defined(ENABLE_PG_G) - if (cpu_feature & CPUID_PGE) +#if !defined(SMP) /* XXX - see also mp_machdep.c */ + if (cpu_feature & CPUID_PGE) { pgeflag = PG_G; + } #endif /* @@ -384,7 +383,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ pdir4mb = 0; -#ifndef DISABLE_PSE +#if !defined(DISABLE_PSE) if (cpu_feature & CPUID_PSE) { pd_entry_t ptditmp; /* @@ -395,64 +394,57 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - } + +#if !defined(SMP) + /* + * Enable the PSE mode. + */ + load_cr4(rcr4() | CR4_PSE); + + /* + * We can do the mapping here for the single processor + * case. We simply ignore the old page table page from + * now on. + */ + /* + * For SMP, we still need 4K pages to bootstrap APs, + * PSE will be enabled as soon as all APs are up. + */ + PTD[KPTDI] = (pd_entry_t) ptditmp; + kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; + invltlb(); #endif -#ifndef SMP - /* - * Turn on PGE/PSE. SMP does this later on since the - * 4K page tables are required for AP boot (for now). - * XXX fixme. - */ - pmap_set_opt(); + } #endif + #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); + /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif - cpu_invltlb(); + + invltlb(); } +#ifdef SMP /* - * Enable 4MB page mode for MP startup. Turn on PG_G support. - * BSP will run this after all the AP's have started up. + * Set 4mb pdir for mp startup */ void pmap_set_opt(void) { - pt_entry_t *pte; - vm_offset_t va; - - if (pgeflag && (cpu_feature & CPUID_PGE)) - load_cr4(rcr4() | CR4_PGE); -#ifndef DISABLE_PSE - if (pseflag && (cpu_feature & CPUID_PSE)) + if (pseflag && (cpu_feature & CPUID_PSE)) { load_cr4(rcr4() | CR4_PSE); -#endif - if (PCPU_GET(cpuid) == 0) { -#ifndef DISABLE_PSE - if (pdir4mb) + if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; -#endif - if (pgeflag) { - /* XXX see earlier comments about virtual_avail */ - for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE) - { - pte = vtopte(va); - if (*pte) - *pte |= pgeflag; - } + cpu_invltlb(); } - /* - * for SMP, this will cause all cpus to reload again, which - * is actually what we want since they now have CR4_PGE on. - */ - invltlb(); - } else - cpu_invltlb(); + } } +#endif /* * Initialize the pmap module. @@ -560,37 +552,27 @@ pmap_track_modified(vm_offset_t va) return 0; } +static PMAP_INLINE void +invltlb_1pg(vm_offset_t va) +{ +#ifdef I386_CPU + invltlb(); +#else + invlpg(va); +#endif +} + static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { #if defined(SMP) - u_int cpumask; - u_int other_cpus; - struct thread *td; - - td = curthread; - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (td->td_critnest == 1) - cpu_critical_exit(td->td_savecrit); - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invlpg(va); /* global */ - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - cpu_invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); - } - critical_exit(); + if (pmap->pm_active & PCPU_GET(cpumask)) + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); #else if (pmap->pm_active) - cpu_invlpg(va); + invltlb_1pg(va); #endif } @@ -598,30 +580,10 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { #if defined(SMP) - u_int cpumask; - u_int other_cpus; - struct thread *td; - - td = curthread; - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (td->td_critnest == 1) - cpu_critical_exit(td->td_savecrit); - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invltlb(); /* global */ - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - cpu_invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); - } - critical_exit(); + if (pmap->pm_active & PCPU_GET(cpumask)) + cpu_invltlb(); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); #else if (pmap->pm_active) invltlb(); @@ -647,7 +609,12 @@ get_ptbase(pmap) /* otherwise, we are alternate address space */ if (frame != (APTDpde & PG_FRAME)) { APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); +#if defined(SMP) + /* The page directory is not shared between CPUs */ + cpu_invltlb(); +#else invltlb(); +#endif } return APTmap; } @@ -676,7 +643,7 @@ pmap_pte_quick(pmap, va) newpf = pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; - pmap_invalidate_page(pmap, (vm_offset_t) PADDR1); + invltlb_1pg((vm_offset_t) PADDR1); } return PADDR1 + (index & (NPTEPG - 1)); } @@ -722,15 +689,20 @@ pmap_extract(pmap, va) /* * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a invltlb after doing the pmap_kenter... */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_offset_t pa) { pt_entry_t *pte; + pt_entry_t npte, opte; + npte = pa | PG_RW | PG_V | pgeflag; pte = vtopte(va); - *pte = pa | PG_RW | PG_V | pgeflag; - invlpg(va); + opte = *pte; + *pte = npte; + invltlb_1pg(va); } /* @@ -739,11 +711,11 @@ pmap_kenter(vm_offset_t va, vm_offset_t pa) PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *pte; + register pt_entry_t *pte; pte = vtopte(va); *pte = 0; - invlpg(va); + invltlb_1pg(va); } /* @@ -761,15 +733,13 @@ pmap_kremove(vm_offset_t va) vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { - vm_offset_t va, sva; - - va = sva = *virt; + vm_offset_t sva = *virt; + vm_offset_t va = sva; while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } - invlpg_range(sva, end); *virt = va; return (sva); } @@ -784,19 +754,28 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) * over. The page *must* be wired. */ void -pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) +pmap_qenter(vm_offset_t va, vm_page_t *m, int count) { - vm_offset_t va, end_va; + vm_offset_t end_va; - va = sva; end_va = va + count * PAGE_SIZE; - + while (va < end_va) { - pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); + pt_entry_t *pte; + + pte = vtopte(va); + *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; +#ifdef SMP + cpu_invlpg((void *)va); +#else + invltlb_1pg(va); +#endif va += PAGE_SIZE; m++; } - invlpg_range(sva, end_va); +#ifdef SMP + smp_invltlb(); +#endif } /* @@ -804,18 +783,27 @@ pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) * kernel -- it is meant only for temporary mappings. */ void -pmap_qremove(vm_offset_t sva, int count) +pmap_qremove(vm_offset_t va, int count) { - vm_offset_t va, end_va; + vm_offset_t end_va; - va = sva; - end_va = va + count * PAGE_SIZE; + end_va = va + count*PAGE_SIZE; while (va < end_va) { - pmap_kremove(va); + pt_entry_t *pte; + + pte = vtopte(va); + *pte = 0; +#ifdef SMP + cpu_invlpg((void *)va); +#else + invltlb_1pg(va); +#endif va += PAGE_SIZE; } - invlpg_range(sva, end_va); +#ifdef SMP + smp_invltlb(); +#endif } static vm_page_t @@ -836,11 +824,14 @@ retry: void pmap_new_proc(struct proc *p) { +#ifdef I386_CPU + int updateneeded = 0; +#endif int i; - vm_page_t ma[UAREA_PAGES]; vm_object_t upobj; vm_offset_t up; vm_page_t m; + pt_entry_t *ptek, oldpte; /* * allocate object for the upages @@ -860,12 +851,13 @@ pmap_new_proc(struct proc *p) p->p_uarea = (struct user *)up; } + ptek = vtopte(up); + for (i = 0; i < UAREA_PAGES; i++) { /* * Get a kernel stack page */ m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - ma[i] = m; /* * Wire the page @@ -873,12 +865,28 @@ pmap_new_proc(struct proc *p) m->wire_count++; cnt.v_wire_count++; + oldpte = *(ptek + i); + /* + * Enter the page into the kernel address space. + */ + *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; +#else + invlpg(up + i * PAGE_SIZE); +#endif + } + vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } - pmap_qenter(up, ma, UAREA_PAGES); +#ifdef I386_CPU + if (updateneeded) + invltlb(); +#endif } /* @@ -893,18 +901,27 @@ pmap_dispose_proc(p) vm_object_t upobj; vm_offset_t up; vm_page_t m; + pt_entry_t *ptek, oldpte; upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; - pmap_qremove(up, UAREA_PAGES); + ptek = vtopte(up); for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_lookup(upobj, i); if (m == NULL) panic("pmap_dispose_proc: upage already missing?"); vm_page_busy(m); + oldpte = *(ptek + i); + *(ptek + i) = 0; +#ifndef I386_CPU + invlpg(up + i * PAGE_SIZE); +#endif vm_page_unwire(m, 0); vm_page_free(m); } +#ifdef I386_CPU + invltlb(); +#endif } /* @@ -921,13 +938,13 @@ pmap_swapout_proc(p) upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; - pmap_qremove(up, UAREA_PAGES); for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_lookup(upobj, i); if (m == NULL) panic("pmap_swapout_proc: upage already missing?"); vm_page_dirty(m); vm_page_unwire(m, 0); + pmap_kremove(up + i * PAGE_SIZE); } } @@ -939,7 +956,6 @@ pmap_swapin_proc(p) struct proc *p; { int i, rv; - vm_page_t ma[UAREA_PAGES]; vm_object_t upobj; vm_offset_t up; vm_page_t m; @@ -948,6 +964,7 @@ pmap_swapin_proc(p) up = (vm_offset_t)p->p_uarea; for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + pmap_kenter(up + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(upobj, &m, 1, 0); if (rv != VM_PAGER_OK) @@ -955,12 +972,10 @@ pmap_swapin_proc(p) m = vm_page_lookup(upobj, i); m->valid = VM_PAGE_BITS_ALL; } - ma[i] = m; vm_page_wire(m); vm_page_wakeup(m); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); } - pmap_qenter(up, ma, UAREA_PAGES); } /* @@ -971,11 +986,14 @@ pmap_swapin_proc(p) void pmap_new_thread(struct thread *td) { +#ifdef I386_CPU + int updateneeded = 0; +#endif int i; - vm_page_t ma[KSTACK_PAGES]; vm_object_t ksobj; vm_page_t m; vm_offset_t ks; + pt_entry_t *ptek, oldpte; /* * allocate object for the kstack @@ -986,33 +1004,45 @@ pmap_new_thread(struct thread *td) td->td_kstack_obj = ksobj; } +#ifdef KSTACK_GUARD /* get a kernel virtual address for the kstack for this thread */ ks = td->td_kstack; -#ifdef KSTACK_GUARD if (ks == 0) { ks = kmem_alloc_nofault(kernel_map, (KSTACK_PAGES + 1) * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); - if (*vtopte(ks) != 0) - pmap_qremove(ks, 1); ks += PAGE_SIZE; td->td_kstack = ks; } + + ptek = vtopte(ks - PAGE_SIZE); + oldpte = *ptek; + *ptek = 0; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; #else + invlpg(ks - PAGE_SIZE); +#endif + } + ptek++; +#else + /* get a kernel virtual address for the kstack for this thread */ + ks = td->td_kstack; if (ks == 0) { ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); td->td_kstack = ks; } + ptek = vtopte(ks); #endif for (i = 0; i < KSTACK_PAGES; i++) { /* * Get a kernel stack page */ m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - ma[i] = m; /* * Wire the page @@ -1020,12 +1050,28 @@ pmap_new_thread(struct thread *td) m->wire_count++; cnt.v_wire_count++; + oldpte = *(ptek + i); + /* + * Enter the page into the kernel address space. + */ + *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; +#else + invlpg(ks + i * PAGE_SIZE); +#endif + } + vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } - pmap_qenter(ks, ma, KSTACK_PAGES); +#ifdef I386_CPU + if (updateneeded) + invltlb(); +#endif } /* @@ -1040,18 +1086,27 @@ pmap_dispose_thread(td) vm_object_t ksobj; vm_offset_t ks; vm_page_t m; + pt_entry_t *ptek, oldpte; ksobj = td->td_kstack_obj; ks = td->td_kstack; - pmap_qremove(ks, KSTACK_PAGES); + ptek = vtopte(ks); for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_busy(m); + oldpte = *(ptek + i); + *(ptek + i) = 0; +#ifndef I386_CPU + invlpg(ks + i * PAGE_SIZE); +#endif vm_page_unwire(m, 0); vm_page_free(m); } +#ifdef I386_CPU + invltlb(); +#endif } /* @@ -1068,13 +1123,13 @@ pmap_swapout_thread(td) ksobj = td->td_kstack_obj; ks = td->td_kstack; - pmap_qremove(ks, KSTACK_PAGES); for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_swapout_thread: kstack already missing?"); vm_page_dirty(m); vm_page_unwire(m, 0); + pmap_kremove(ks + i * PAGE_SIZE); } } @@ -1086,7 +1141,6 @@ pmap_swapin_thread(td) struct thread *td; { int i, rv; - vm_page_t ma[KSTACK_PAGES]; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; @@ -1095,6 +1149,7 @@ pmap_swapin_thread(td) ks = td->td_kstack; for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(ksobj, &m, 1, 0); if (rv != VM_PAGER_OK) @@ -1102,12 +1157,10 @@ pmap_swapin_thread(td) m = vm_page_lookup(ksobj, i); m->valid = VM_PAGE_BITS_ALL; } - ma[i] = m; vm_page_wire(m); vm_page_wakeup(m); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); } - pmap_qenter(ks, ma, KSTACK_PAGES); } /*************************************************** @@ -1202,8 +1255,7 @@ pmap_pinit0(pmap) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); - pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t)IdlePTD); - invlpg((vm_offset_t)pmap->pm_pdir); + pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); pmap->pm_count = 1; pmap->pm_ptphint = NULL; pmap->pm_active = 0; @@ -1249,7 +1301,7 @@ pmap_pinit(pmap) vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ ptdpg->valid = VM_PAGE_BITS_ALL; - pmap_qenter((vm_offset_t) pmap->pm_pdir, &ptdpg, 1); + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); if ((ptdpg->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir, PAGE_SIZE); @@ -2155,7 +2207,13 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; - pmap_invalidate_page(pmap, va); +#ifdef SMP + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); +#else + invltlb_1pg(va); +#endif } return; } @@ -2223,7 +2281,13 @@ validate: if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { - pmap_invalidate_page(pmap, va); +#ifdef SMP + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); +#else + invltlb_1pg(va); +#endif } } } @@ -2338,7 +2402,6 @@ void * pmap_kenter_temporary(vm_offset_t pa, int i) { pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); - invlpg((vm_offset_t)crashdumpmap + (i * PAGE_SIZE)); return ((void *)crashdumpmap); } @@ -2647,6 +2710,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t pdnxt; pd_entry_t src_frame, dst_frame; vm_page_t m; + pd_entry_t saved_pde; if (dst_addr != src_addr) return; @@ -2656,7 +2720,17 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, return; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; - for (addr = src_addr; addr < end_addr; addr = pdnxt) { + if (dst_frame != (APTDpde & PG_FRAME)) { + APTDpde = dst_frame | PG_RW | PG_V; +#if defined(SMP) + /* The page directory is not shared between CPUs */ + cpu_invltlb(); +#else + invltlb(); +#endif + } + saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); + for(addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; @@ -2697,14 +2771,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pdnxt > end_addr) pdnxt = end_addr; - /* - * Have to recheck this before every avtopte() call below - * in case we have blocked and something else used APTDpde. - */ - if (dst_frame != (APTDpde & PG_FRAME)) { - APTDpde = dst_frame | PG_RW | PG_V; - invltlb(); - } src_pte = vtopte(addr); dst_pte = avtopte(addr); while (addr < pdnxt) { @@ -2720,6 +2786,16 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); + if ((APTDpde & PG_FRAME) != + (saved_pde & PG_FRAME)) { + APTDpde = saved_pde; +printf ("IT HAPPENNED!"); +#if defined(SMP) + cpu_invltlb(); +#else + invltlb(); +#endif + } if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and @@ -2763,15 +2839,12 @@ void pmap_zero_page(vm_offset_t phys) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); + *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ + invltlb_1pg((vm_offset_t)CADDR2); + #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); @@ -2779,9 +2852,6 @@ pmap_zero_page(vm_offset_t phys) #endif bzero(CADDR2, PAGE_SIZE); *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } /* @@ -2794,15 +2864,12 @@ void pmap_zero_page_area(vm_offset_t phys, int off, int size) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); + *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ + invltlb_1pg((vm_offset_t)CADDR2); + #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); @@ -2810,9 +2877,6 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size) #endif bzero((char *)CADDR2 + off, size); *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } /* @@ -2825,11 +2889,6 @@ void pmap_copy_page(vm_offset_t src, vm_offset_t dst) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) @@ -2837,14 +2896,17 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst) *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((u_int)CADDR1); /* SMP: local only */ - cpu_invlpg((u_int)CADDR2); /* SMP: local only */ +#ifdef I386_CPU + invltlb(); +#else + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +#endif + bcopy(CADDR1, CADDR2, PAGE_SIZE); + *CMAP1 = 0; *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } @@ -2944,7 +3006,7 @@ pmap_remove_pages(pmap, sva, eva) if (tpte == 0) { printf("TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); - panic("bad peter"); + panic("bad pte"); } /* @@ -3260,13 +3322,14 @@ pmap_mapdev(pa, size) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; - for (tmpva = va; size > 0; ) { + for (tmpva = va; size > 0;) { pte = vtopte(tmpva); *pte = pa | PG_RW | PG_V | pgeflag; size -= PAGE_SIZE; tmpva += PAGE_SIZE; + pa += PAGE_SIZE; } - invlpg_range(va, tmpva); + invltlb(); return ((void *)(va + offset)); } @@ -3276,20 +3339,11 @@ pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { - vm_offset_t base, offset, tmpva; - pt_entry_t *pte; + vm_offset_t base, offset; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); - - for (tmpva = base; size > 0; ) { - pte = vtopte(tmpva); - *pte = 0; - size -= PAGE_SIZE; - tmpva += PAGE_SIZE; - } - invlpg_range(va, tmpva); kmem_free(kernel_map, base, size); } diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 0649009..bc58672 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1591,6 +1591,42 @@ ENTRY(ssdtosd) popl %ebx ret +/* load_cr0(cr0) */ +ENTRY(load_cr0) + movl 4(%esp),%eax + movl %eax,%cr0 + ret + +/* rcr0() */ +ENTRY(rcr0) + movl %cr0,%eax + ret + +/* rcr3() */ +ENTRY(rcr3) + movl %cr3,%eax + ret + +/* void load_cr3(caddr_t cr3) */ +ENTRY(load_cr3) +#ifdef SWTCH_OPTIM_STATS + incl tlb_flush_count +#endif + movl 4(%esp),%eax + movl %eax,%cr3 + ret + +/* rcr4() */ +ENTRY(rcr4) + movl %cr4,%eax + ret + +/* void load_cr4(caddr_t cr4) */ +ENTRY(load_cr4) + movl 4(%esp),%eax + movl %eax,%cr4 + ret + /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index 0649009..bc58672 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -1591,6 +1591,42 @@ ENTRY(ssdtosd) popl %ebx ret +/* load_cr0(cr0) */ +ENTRY(load_cr0) + movl 4(%esp),%eax + movl %eax,%cr0 + ret + +/* rcr0() */ +ENTRY(rcr0) + movl %cr0,%eax + ret + +/* rcr3() */ +ENTRY(rcr3) + movl %cr3,%eax + ret + +/* void load_cr3(caddr_t cr3) */ +ENTRY(load_cr3) +#ifdef SWTCH_OPTIM_STATS + incl tlb_flush_count +#endif + movl 4(%esp),%eax + movl %eax,%cr3 + ret + +/* rcr4() */ +ENTRY(rcr4) + movl %cr4,%eax + ret + +/* void load_cr4(caddr_t cr4) */ +ENTRY(load_cr4) + movl 4(%esp),%eax + movl %eax,%cr4 + ret + /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 94d5c3a..969541f 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -227,6 +227,62 @@ invd(void) __asm __volatile("invd"); } +#if defined(SMP) && defined(_KERNEL) + +/* + * When using APIC IPI's, invlpg() is not simply the invlpg instruction + * (this is a bug) and the inlining cost is prohibitive since the call + * executes into the IPI transmission system. + */ +void invlpg __P((u_int addr)); +void invltlb __P((void)); + +static __inline void +cpu_invlpg(void *addr) +{ + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +static __inline void +cpu_invltlb(void) +{ + u_int temp; + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() + * is inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) + : : "memory"); +#if defined(SWTCH_OPTIM_STATS) + ++tlb_flush_count; +#endif +} + +#else /* !(SMP && _KERNEL) */ + +static __inline void +invlpg(u_int addr) +{ + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +static __inline void +invltlb(void) +{ + u_int temp; + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() + * is inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) + : : "memory"); +#ifdef SWTCH_OPTIM_STATS + ++tlb_flush_count; +#endif +} + +#endif /* SMP && _KERNEL */ + static __inline u_short inw(u_int port) { @@ -292,6 +348,15 @@ outw(u_int port, u_short data) } static __inline u_int +rcr2(void) +{ + u_int data; + + __asm __volatile("movl %%cr2,%0" : "=r" (data)); + return (data); +} + +static __inline u_int read_eflags(void) { u_int ef; @@ -355,162 +420,6 @@ wrmsr(u_int msr, u_int64_t newval) __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } -static __inline void -load_cr0(u_int data) -{ - - __asm __volatile("movl %0,%%cr0" : : "r" (data)); -} - -static __inline u_int -rcr0(void) -{ - u_int data; - - __asm __volatile("movl %%cr0,%0" : "=r" (data)); - return (data); -} - -static __inline u_int -rcr2(void) -{ - u_int data; - - __asm __volatile("movl %%cr2,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_cr3(u_int data) -{ - - __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); -#if defined(SWTCH_OPTIM_STATS) - ++tlb_flush_count; -#endif -} - -static __inline u_int -rcr3(void) -{ - u_int data; - - __asm __volatile("movl %%cr3,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_cr4(u_int data) -{ - __asm __volatile("movl %0,%%cr4" : : "r" (data)); -} - -static __inline u_int -rcr4(void) -{ - u_int data; - - __asm __volatile("movl %%cr4,%0" : "=r" (data)); - return (data); -} - -/* - * Global TLB flush (except for thise for pages marked PG_G) - */ -static __inline void -cpu_invltlb(void) -{ - - load_cr3(rcr3()); -} - -/* - * TLB flush for an individual page (even if it has PG_G). - * Only works on 486+ CPUs (i386 does not have PG_G). - */ -static __inline void -cpu_invlpg(u_int addr) -{ - -#ifndef I386_CPU - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -#else - cpu_invltlb(); -#endif -} - -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -/* - * Same as above but for a range of pages. - */ -static __inline void -cpu_invlpg_range(u_int startva, u_int endva) -{ -#ifndef I386_CPU - u_int addr; - - for (addr = startva; addr < endva; addr += PAGE_SIZE) - __asm __volatile("invlpg %0" : : "m" (*(char *)addr)); - __asm __volatile("" : : : "memory"); -#else - cpu_invltlb(); -#endif -} -#endif - -#ifdef SMP -extern void smp_invlpg(u_int addr); -extern void smp_masked_invlpg(u_int mask, u_int addr); -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -extern void smp_invlpg_range(u_int startva, u_int endva); -extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva); -#endif -extern void smp_invltlb(void); -extern void smp_masked_invltlb(u_int mask); -#endif - -/* - * Generic page TLB flush. Takes care of SMP. - */ -static __inline void -invlpg(u_int addr) -{ - - cpu_invlpg(addr); -#ifdef SMP - smp_invlpg(addr); -#endif -} - -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -/* - * Generic TLB flush for a range of pages. Takes care of SMP. - * Saves many IPIs for SMP mode. - */ -static __inline void -invlpg_range(u_int startva, u_int endva) -{ - - cpu_invlpg_range(startva, endva); -#ifdef SMP - smp_invlpg_range(startva, endva); -#endif -} -#endif - -/* - * Generic global TLB flush (except for thise for pages marked PG_G) - */ -static __inline void -invltlb(void) -{ - - cpu_invltlb(); -#ifdef SMP - smp_invltlb(); -#endif -} - static __inline u_int rfs(void) { @@ -672,8 +581,6 @@ cpu_critical_exit(critical_t eflags) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); -void cpu_invlpg __P((u_int addr)); -void cpu_invlpg_range __P((u_int start, u_int end)); void disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); void enable_intr __P((void)); @@ -684,26 +591,15 @@ void insl __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt)); void invd __P((void)); void invlpg __P((u_int addr)); -void invlpg_range __P((u_int start, u_int end)); void invltlb __P((void)); u_short inw __P((u_int port)); -void load_cr0 __P((u_int cr0)); -void load_cr3 __P((u_int cr3)); -void load_cr4 __P((u_int cr4)); -void load_fs __P((u_int sel)); -void load_gs __P((u_int sel)); void outb __P((u_int port, u_char data)); void outl __P((u_int port, u_int data)); void outsb __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt)); void outw __P((u_int port, u_short data)); -u_int rcr0 __P((void)); u_int rcr2 __P((void)); -u_int rcr3 __P((void)); -u_int rcr4 __P((void)); -u_int rfs __P((void)); -u_int rgs __P((void)); u_int64_t rdmsr __P((u_int msr)); u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdtsc __P((void)); @@ -711,12 +607,22 @@ u_int read_eflags __P((void)); void wbinvd __P((void)); void write_eflags __P((u_int ef)); void wrmsr __P((u_int msr, u_int64_t newval)); +u_int rfs __P((void)); +u_int rgs __P((void)); +void load_fs __P((u_int sel)); +void load_gs __P((u_int sel)); critical_t cpu_critical_enter __P((void)); void cpu_critical_exit __P((critical_t eflags)); #endif /* __GNUC__ */ +void load_cr0 __P((u_int cr0)); +void load_cr3 __P((u_int cr3)); +void load_cr4 __P((u_int cr4)); void ltr __P((u_short sel)); +u_int rcr0 __P((void)); +u_int rcr3 __P((void)); +u_int rcr4 __P((void)); void reset_dbregs __P((void)); __END_DECLS diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 008dfc5..27ee7ae 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 618bb3f..7358a9e 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -151,7 +151,7 @@ extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde, APTDpde; -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ +extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */ #endif #ifdef _KERNEL @@ -267,7 +267,9 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t)); pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); +#ifdef SMP void pmap_set_opt __P((void)); +#endif #endif /* _KERNEL */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 4136c20..34228e2 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -51,8 +51,6 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */ * Interprocessor interrupts for SMP. */ #define IPI_INVLTLB XINVLTLB_OFFSET -#define IPI_INVLPG XINVLPG_OFFSET -#define IPI_INVLRNG XINVLRNG_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_AST XCPUAST_OFFSET #define IPI_STOP XCPUSTOP_OFFSET @@ -109,6 +107,7 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); +void smp_invltlb __P((void)); void forward_statclock __P((void)); void forwarded_statclock __P((struct trapframe frame)); void forward_hardclock __P((void)); diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index 92bf581..cfc162b 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -499,6 +499,14 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; +#ifdef APIC_INTR_REORDER +#ifdef APIC_INTR_HIGHPRI_CLOCK + /* XXX: Hack (kludge?) for more accurate clock. */ + if (intr == apic_8254_intr || intr == 8) { + vector = TPR_FAST_INTS + intr; + } +#endif +#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index 789b02b..1726635 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -88,7 +88,6 @@ /* IDT vector base for regular (aka. slow) and fast interrupts */ #define TPR_SLOW_INTS 0x20 #define TPR_FAST_INTS 0x60 -/* XXX note that the AST interrupt is at 0x50 */ /* blocking values for local APIC Task Priority Register */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */ @@ -105,23 +104,20 @@ #endif /** TEST_TEST1 */ /* TLB shootdowns */ -#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */ -#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */ -#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */ +#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* inter-cpu clock handling */ -#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */ -#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */ +#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) +#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) /* inter-CPU rendezvous */ -#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */ +#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) /* IPI to generate an additional software trap at the target CPU */ -/* XXX in the middle of the interrupt range, overlapping IRQ48 */ -#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */ +#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* IPI to signal CPUs to stop and wait for another CPU to restart them */ -#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */ +#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: @@ -185,9 +181,7 @@ inthand_t IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); inthand_t - Xinvltlb, /* TLB shootdowns - global */ - Xinvlpg, /* TLB shootdowns - 1 page */ - Xinvlrng, /* TLB shootdowns - page range */ + Xinvltlb, /* TLB shootdowns */ Xhardclock, /* Forward hardclock() */ Xstatclock, /* Forward statclock() */ Xcpuast, /* Additional software trap on other cpu */ diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c index 92bf581..cfc162b 100644 --- a/sys/amd64/isa/nmi.c +++ b/sys/amd64/isa/nmi.c @@ -499,6 +499,14 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; +#ifdef APIC_INTR_REORDER +#ifdef APIC_INTR_HIGHPRI_CLOCK + /* XXX: Hack (kludge?) for more accurate clock. */ + if (intr == apic_8254_intr || intr == 8) { + vector = TPR_FAST_INTS + intr; + } +#endif +#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index 3848b0a..b505dbd 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -5,7 +5,6 @@ DISABLE_PSE MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h PMAP_SHPGPERPROC opt_pmap.h -ENABLE_PG_G opt_pmap.h PPC_PROBE_CHIPSET opt_ppc.h PPC_DEBUG opt_ppc.h SHOW_BUSYBUFS diff --git a/sys/conf/options.pc98 b/sys/conf/options.pc98 index 9b71078..6cf1db2 100644 --- a/sys/conf/options.pc98 +++ b/sys/conf/options.pc98 @@ -5,7 +5,6 @@ DISABLE_PSE MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h PMAP_SHPGPERPROC opt_pmap.h -ENABLE_PG_G opt_pmap.h PPC_PROBE_CHIPSET opt_ppc.h PPC_DEBUG opt_ppc.h SHOW_BUSYBUFS diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index e3a37e1..95c9133 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -181,108 +181,30 @@ Xspuriousint: iret /* - * Global address space TLB shootdown. + * Handle TLB shootdowns. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax /* Private space selector */ + movl $KPSEL, %eax mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - incl xhits_gbl(,%eax,4) + ss + incl _xhits(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 + ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Single page TLB shootdown - */ - .text - SUPERALIGN_TEXT - .globl Xinvlpg -Xinvlpg: - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - ss - incl xhits_pg(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %eax - invlpg (%eax) /* invalidate single page */ - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Page range TLB shootdown. - */ - .text - SUPERALIGN_TEXT - .globl Xinvlrng -Xinvlrng: - pushl %eax - pushl %edx - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_rng(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %edx - movl smp_tlb_addr2, %eax -1: invlpg (%edx) /* invalidate single page */ - addl $PAGE_SIZE, %edx - cmpl %edx, %eax - jb 1b - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %edx popl %eax iret @@ -521,6 +443,12 @@ Xrendezvous: .data +#ifdef COUNT_XINVLTLB_HITS + .globl _xhits +_xhits: + .space (NCPU * 4), 0 +#endif /* COUNT_XINVLTLB_HITS */ + .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/i386/i386/bios.c b/sys/i386/i386/bios.c index c4ff2d5..3d8f357 100644 --- a/sys/i386/i386/bios.c +++ b/sys/i386/i386/bios.c @@ -320,8 +320,7 @@ bios16(struct bios_args *args, char *fmt, ...) va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; - pt_entry_t *pte; - pd_entry_t *ptd; + u_int *pte, *ptd; arg_start = 0xffffffff; arg_end = 0; @@ -380,19 +379,19 @@ bios16(struct bios_args *args, char *fmt, ...) args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; args->seg.code32.limit = 0xffff; - ptd = (pd_entry_t *)rcr3(); + ptd = (u_int *)rcr3(); if (ptd == IdlePTD) { /* * no page table, so create one and install it. */ - pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + pte = (u_int *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + ptd = (u_int *)((u_int)ptd + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* * this is a user-level page table */ - pte = PTmap; + pte = (u_int *)&PTmap; } /* * install pointer to page 0. we don't need to flush the tlb, @@ -449,7 +448,7 @@ bios16(struct bios_args *args, char *fmt, ...) i = bios16_call(&args->r, stack_top); - if (pte == PTmap) { + if (pte == (u_int *)&PTmap) { *pte = 0; /* remove entry */ } else { *ptd = 0; /* remove page table */ diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 299bc3e..4fff220 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -381,6 +381,12 @@ begin: movl IdlePTD,%esi movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + movl %cr4, %eax + orl $CR4_PGE, %eax + movl %eax, %cr4 +1: pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ @@ -803,7 +809,14 @@ no_kernend: jne map_read_write #endif xorl %edx,%edx - movl $R(etext),%ecx + +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 2f + orl $PG_G,%edx +#endif + +2: movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) @@ -814,7 +827,13 @@ no_kernend: andl $~PAGE_MASK, %eax map_read_write: movl $PG_RW,%edx - movl R(KERNend),%ecx +#if !defined(SMP) + testl $CPUID_PGE, R(cpu_feature) + jz 1f + orl $PG_G,%edx +#endif + +1: movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 008dfc5..27ee7ae 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index d3f4d3d..dc7861f 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -101,6 +101,9 @@ apic_initialize(void) #endif /** TEST_TEST1 */ lapic.svr = temp; + + if (bootverbose) + apic_dump("apic_initialize()"); } diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 008dfc5..27ee7ae 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index a18d0da..f12cb0b 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -85,9 +85,6 @@ #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> -#if defined(SMP) -#include <sys/smp.h> -#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -104,6 +101,7 @@ #include <machine/md_var.h> #include <machine/specialreg.h> #if defined(SMP) || defined(APIC_IO) +#include <machine/smp.h> #include <machine/apic.h> #include <machine/segments.h> #include <machine/tss.h> @@ -261,10 +259,10 @@ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; - #ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) + if (cpu_feature & CPUID_PSE) { newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); + } #endif return newaddr; } @@ -369,9 +367,10 @@ pmap_bootstrap(firstaddr, loadaddr) PTD[i] = 0; pgeflag = 0; -#if /* !defined(SMP) || */ defined(ENABLE_PG_G) - if (cpu_feature & CPUID_PGE) +#if !defined(SMP) /* XXX - see also mp_machdep.c */ + if (cpu_feature & CPUID_PGE) { pgeflag = PG_G; + } #endif /* @@ -384,7 +383,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ pdir4mb = 0; -#ifndef DISABLE_PSE +#if !defined(DISABLE_PSE) if (cpu_feature & CPUID_PSE) { pd_entry_t ptditmp; /* @@ -395,64 +394,57 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - } + +#if !defined(SMP) + /* + * Enable the PSE mode. + */ + load_cr4(rcr4() | CR4_PSE); + + /* + * We can do the mapping here for the single processor + * case. We simply ignore the old page table page from + * now on. + */ + /* + * For SMP, we still need 4K pages to bootstrap APs, + * PSE will be enabled as soon as all APs are up. + */ + PTD[KPTDI] = (pd_entry_t) ptditmp; + kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; + invltlb(); #endif -#ifndef SMP - /* - * Turn on PGE/PSE. SMP does this later on since the - * 4K page tables are required for AP boot (for now). - * XXX fixme. - */ - pmap_set_opt(); + } #endif + #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); + /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif - cpu_invltlb(); + + invltlb(); } +#ifdef SMP /* - * Enable 4MB page mode for MP startup. Turn on PG_G support. - * BSP will run this after all the AP's have started up. + * Set 4mb pdir for mp startup */ void pmap_set_opt(void) { - pt_entry_t *pte; - vm_offset_t va; - - if (pgeflag && (cpu_feature & CPUID_PGE)) - load_cr4(rcr4() | CR4_PGE); -#ifndef DISABLE_PSE - if (pseflag && (cpu_feature & CPUID_PSE)) + if (pseflag && (cpu_feature & CPUID_PSE)) { load_cr4(rcr4() | CR4_PSE); -#endif - if (PCPU_GET(cpuid) == 0) { -#ifndef DISABLE_PSE - if (pdir4mb) + if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; -#endif - if (pgeflag) { - /* XXX see earlier comments about virtual_avail */ - for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE) - { - pte = vtopte(va); - if (*pte) - *pte |= pgeflag; - } + cpu_invltlb(); } - /* - * for SMP, this will cause all cpus to reload again, which - * is actually what we want since they now have CR4_PGE on. - */ - invltlb(); - } else - cpu_invltlb(); + } } +#endif /* * Initialize the pmap module. @@ -560,37 +552,27 @@ pmap_track_modified(vm_offset_t va) return 0; } +static PMAP_INLINE void +invltlb_1pg(vm_offset_t va) +{ +#ifdef I386_CPU + invltlb(); +#else + invlpg(va); +#endif +} + static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { #if defined(SMP) - u_int cpumask; - u_int other_cpus; - struct thread *td; - - td = curthread; - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (td->td_critnest == 1) - cpu_critical_exit(td->td_savecrit); - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invlpg(va); /* global */ - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - cpu_invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); - } - critical_exit(); + if (pmap->pm_active & PCPU_GET(cpumask)) + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); #else if (pmap->pm_active) - cpu_invlpg(va); + invltlb_1pg(va); #endif } @@ -598,30 +580,10 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { #if defined(SMP) - u_int cpumask; - u_int other_cpus; - struct thread *td; - - td = curthread; - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (td->td_critnest == 1) - cpu_critical_exit(td->td_savecrit); - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invltlb(); /* global */ - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - cpu_invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); - } - critical_exit(); + if (pmap->pm_active & PCPU_GET(cpumask)) + cpu_invltlb(); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); #else if (pmap->pm_active) invltlb(); @@ -647,7 +609,12 @@ get_ptbase(pmap) /* otherwise, we are alternate address space */ if (frame != (APTDpde & PG_FRAME)) { APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); +#if defined(SMP) + /* The page directory is not shared between CPUs */ + cpu_invltlb(); +#else invltlb(); +#endif } return APTmap; } @@ -676,7 +643,7 @@ pmap_pte_quick(pmap, va) newpf = pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; - pmap_invalidate_page(pmap, (vm_offset_t) PADDR1); + invltlb_1pg((vm_offset_t) PADDR1); } return PADDR1 + (index & (NPTEPG - 1)); } @@ -722,15 +689,20 @@ pmap_extract(pmap, va) /* * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a invltlb after doing the pmap_kenter... */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_offset_t pa) { pt_entry_t *pte; + pt_entry_t npte, opte; + npte = pa | PG_RW | PG_V | pgeflag; pte = vtopte(va); - *pte = pa | PG_RW | PG_V | pgeflag; - invlpg(va); + opte = *pte; + *pte = npte; + invltlb_1pg(va); } /* @@ -739,11 +711,11 @@ pmap_kenter(vm_offset_t va, vm_offset_t pa) PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *pte; + register pt_entry_t *pte; pte = vtopte(va); *pte = 0; - invlpg(va); + invltlb_1pg(va); } /* @@ -761,15 +733,13 @@ pmap_kremove(vm_offset_t va) vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { - vm_offset_t va, sva; - - va = sva = *virt; + vm_offset_t sva = *virt; + vm_offset_t va = sva; while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } - invlpg_range(sva, end); *virt = va; return (sva); } @@ -784,19 +754,28 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) * over. The page *must* be wired. */ void -pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) +pmap_qenter(vm_offset_t va, vm_page_t *m, int count) { - vm_offset_t va, end_va; + vm_offset_t end_va; - va = sva; end_va = va + count * PAGE_SIZE; - + while (va < end_va) { - pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); + pt_entry_t *pte; + + pte = vtopte(va); + *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; +#ifdef SMP + cpu_invlpg((void *)va); +#else + invltlb_1pg(va); +#endif va += PAGE_SIZE; m++; } - invlpg_range(sva, end_va); +#ifdef SMP + smp_invltlb(); +#endif } /* @@ -804,18 +783,27 @@ pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) * kernel -- it is meant only for temporary mappings. */ void -pmap_qremove(vm_offset_t sva, int count) +pmap_qremove(vm_offset_t va, int count) { - vm_offset_t va, end_va; + vm_offset_t end_va; - va = sva; - end_va = va + count * PAGE_SIZE; + end_va = va + count*PAGE_SIZE; while (va < end_va) { - pmap_kremove(va); + pt_entry_t *pte; + + pte = vtopte(va); + *pte = 0; +#ifdef SMP + cpu_invlpg((void *)va); +#else + invltlb_1pg(va); +#endif va += PAGE_SIZE; } - invlpg_range(sva, end_va); +#ifdef SMP + smp_invltlb(); +#endif } static vm_page_t @@ -836,11 +824,14 @@ retry: void pmap_new_proc(struct proc *p) { +#ifdef I386_CPU + int updateneeded = 0; +#endif int i; - vm_page_t ma[UAREA_PAGES]; vm_object_t upobj; vm_offset_t up; vm_page_t m; + pt_entry_t *ptek, oldpte; /* * allocate object for the upages @@ -860,12 +851,13 @@ pmap_new_proc(struct proc *p) p->p_uarea = (struct user *)up; } + ptek = vtopte(up); + for (i = 0; i < UAREA_PAGES; i++) { /* * Get a kernel stack page */ m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - ma[i] = m; /* * Wire the page @@ -873,12 +865,28 @@ pmap_new_proc(struct proc *p) m->wire_count++; cnt.v_wire_count++; + oldpte = *(ptek + i); + /* + * Enter the page into the kernel address space. + */ + *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; +#else + invlpg(up + i * PAGE_SIZE); +#endif + } + vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } - pmap_qenter(up, ma, UAREA_PAGES); +#ifdef I386_CPU + if (updateneeded) + invltlb(); +#endif } /* @@ -893,18 +901,27 @@ pmap_dispose_proc(p) vm_object_t upobj; vm_offset_t up; vm_page_t m; + pt_entry_t *ptek, oldpte; upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; - pmap_qremove(up, UAREA_PAGES); + ptek = vtopte(up); for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_lookup(upobj, i); if (m == NULL) panic("pmap_dispose_proc: upage already missing?"); vm_page_busy(m); + oldpte = *(ptek + i); + *(ptek + i) = 0; +#ifndef I386_CPU + invlpg(up + i * PAGE_SIZE); +#endif vm_page_unwire(m, 0); vm_page_free(m); } +#ifdef I386_CPU + invltlb(); +#endif } /* @@ -921,13 +938,13 @@ pmap_swapout_proc(p) upobj = p->p_upages_obj; up = (vm_offset_t)p->p_uarea; - pmap_qremove(up, UAREA_PAGES); for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_lookup(upobj, i); if (m == NULL) panic("pmap_swapout_proc: upage already missing?"); vm_page_dirty(m); vm_page_unwire(m, 0); + pmap_kremove(up + i * PAGE_SIZE); } } @@ -939,7 +956,6 @@ pmap_swapin_proc(p) struct proc *p; { int i, rv; - vm_page_t ma[UAREA_PAGES]; vm_object_t upobj; vm_offset_t up; vm_page_t m; @@ -948,6 +964,7 @@ pmap_swapin_proc(p) up = (vm_offset_t)p->p_uarea; for (i = 0; i < UAREA_PAGES; i++) { m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + pmap_kenter(up + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(upobj, &m, 1, 0); if (rv != VM_PAGER_OK) @@ -955,12 +972,10 @@ pmap_swapin_proc(p) m = vm_page_lookup(upobj, i); m->valid = VM_PAGE_BITS_ALL; } - ma[i] = m; vm_page_wire(m); vm_page_wakeup(m); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); } - pmap_qenter(up, ma, UAREA_PAGES); } /* @@ -971,11 +986,14 @@ pmap_swapin_proc(p) void pmap_new_thread(struct thread *td) { +#ifdef I386_CPU + int updateneeded = 0; +#endif int i; - vm_page_t ma[KSTACK_PAGES]; vm_object_t ksobj; vm_page_t m; vm_offset_t ks; + pt_entry_t *ptek, oldpte; /* * allocate object for the kstack @@ -986,33 +1004,45 @@ pmap_new_thread(struct thread *td) td->td_kstack_obj = ksobj; } +#ifdef KSTACK_GUARD /* get a kernel virtual address for the kstack for this thread */ ks = td->td_kstack; -#ifdef KSTACK_GUARD if (ks == 0) { ks = kmem_alloc_nofault(kernel_map, (KSTACK_PAGES + 1) * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); - if (*vtopte(ks) != 0) - pmap_qremove(ks, 1); ks += PAGE_SIZE; td->td_kstack = ks; } + + ptek = vtopte(ks - PAGE_SIZE); + oldpte = *ptek; + *ptek = 0; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; #else + invlpg(ks - PAGE_SIZE); +#endif + } + ptek++; +#else + /* get a kernel virtual address for the kstack for this thread */ + ks = td->td_kstack; if (ks == 0) { ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); td->td_kstack = ks; } + ptek = vtopte(ks); #endif for (i = 0; i < KSTACK_PAGES; i++) { /* * Get a kernel stack page */ m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - ma[i] = m; /* * Wire the page @@ -1020,12 +1050,28 @@ pmap_new_thread(struct thread *td) m->wire_count++; cnt.v_wire_count++; + oldpte = *(ptek + i); + /* + * Enter the page into the kernel address space. + */ + *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; + if (oldpte) { +#ifdef I386_CPU + updateneeded = 1; +#else + invlpg(ks + i * PAGE_SIZE); +#endif + } + vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); m->valid = VM_PAGE_BITS_ALL; } - pmap_qenter(ks, ma, KSTACK_PAGES); +#ifdef I386_CPU + if (updateneeded) + invltlb(); +#endif } /* @@ -1040,18 +1086,27 @@ pmap_dispose_thread(td) vm_object_t ksobj; vm_offset_t ks; vm_page_t m; + pt_entry_t *ptek, oldpte; ksobj = td->td_kstack_obj; ks = td->td_kstack; - pmap_qremove(ks, KSTACK_PAGES); + ptek = vtopte(ks); for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_busy(m); + oldpte = *(ptek + i); + *(ptek + i) = 0; +#ifndef I386_CPU + invlpg(ks + i * PAGE_SIZE); +#endif vm_page_unwire(m, 0); vm_page_free(m); } +#ifdef I386_CPU + invltlb(); +#endif } /* @@ -1068,13 +1123,13 @@ pmap_swapout_thread(td) ksobj = td->td_kstack_obj; ks = td->td_kstack; - pmap_qremove(ks, KSTACK_PAGES); for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_swapout_thread: kstack already missing?"); vm_page_dirty(m); vm_page_unwire(m, 0); + pmap_kremove(ks + i * PAGE_SIZE); } } @@ -1086,7 +1141,6 @@ pmap_swapin_thread(td) struct thread *td; { int i, rv; - vm_page_t ma[KSTACK_PAGES]; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; @@ -1095,6 +1149,7 @@ pmap_swapin_thread(td) ks = td->td_kstack; for (i = 0; i < KSTACK_PAGES; i++) { m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(ksobj, &m, 1, 0); if (rv != VM_PAGER_OK) @@ -1102,12 +1157,10 @@ pmap_swapin_thread(td) m = vm_page_lookup(ksobj, i); m->valid = VM_PAGE_BITS_ALL; } - ma[i] = m; vm_page_wire(m); vm_page_wakeup(m); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); } - pmap_qenter(ks, ma, KSTACK_PAGES); } /*************************************************** @@ -1202,8 +1255,7 @@ pmap_pinit0(pmap) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); - pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t)IdlePTD); - invlpg((vm_offset_t)pmap->pm_pdir); + pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); pmap->pm_count = 1; pmap->pm_ptphint = NULL; pmap->pm_active = 0; @@ -1249,7 +1301,7 @@ pmap_pinit(pmap) vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ ptdpg->valid = VM_PAGE_BITS_ALL; - pmap_qenter((vm_offset_t) pmap->pm_pdir, &ptdpg, 1); + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); if ((ptdpg->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir, PAGE_SIZE); @@ -2155,7 +2207,13 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; - pmap_invalidate_page(pmap, va); +#ifdef SMP + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); +#else + invltlb_1pg(va); +#endif } return; } @@ -2223,7 +2281,13 @@ validate: if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { - pmap_invalidate_page(pmap, va); +#ifdef SMP + cpu_invlpg((void *)va); + if (pmap->pm_active & PCPU_GET(other_cpus)) + smp_invltlb(); +#else + invltlb_1pg(va); +#endif } } } @@ -2338,7 +2402,6 @@ void * pmap_kenter_temporary(vm_offset_t pa, int i) { pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); - invlpg((vm_offset_t)crashdumpmap + (i * PAGE_SIZE)); return ((void *)crashdumpmap); } @@ -2647,6 +2710,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t pdnxt; pd_entry_t src_frame, dst_frame; vm_page_t m; + pd_entry_t saved_pde; if (dst_addr != src_addr) return; @@ -2656,7 +2720,17 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, return; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; - for (addr = src_addr; addr < end_addr; addr = pdnxt) { + if (dst_frame != (APTDpde & PG_FRAME)) { + APTDpde = dst_frame | PG_RW | PG_V; +#if defined(SMP) + /* The page directory is not shared between CPUs */ + cpu_invltlb(); +#else + invltlb(); +#endif + } + saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); + for(addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; @@ -2697,14 +2771,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (pdnxt > end_addr) pdnxt = end_addr; - /* - * Have to recheck this before every avtopte() call below - * in case we have blocked and something else used APTDpde. - */ - if (dst_frame != (APTDpde & PG_FRAME)) { - APTDpde = dst_frame | PG_RW | PG_V; - invltlb(); - } src_pte = vtopte(addr); dst_pte = avtopte(addr); while (addr < pdnxt) { @@ -2720,6 +2786,16 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); + if ((APTDpde & PG_FRAME) != + (saved_pde & PG_FRAME)) { + APTDpde = saved_pde; +printf ("IT HAPPENNED!"); +#if defined(SMP) + cpu_invltlb(); +#else + invltlb(); +#endif + } if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and @@ -2763,15 +2839,12 @@ void pmap_zero_page(vm_offset_t phys) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); + *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ + invltlb_1pg((vm_offset_t)CADDR2); + #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); @@ -2779,9 +2852,6 @@ pmap_zero_page(vm_offset_t phys) #endif bzero(CADDR2, PAGE_SIZE); *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } /* @@ -2794,15 +2864,12 @@ void pmap_zero_page_area(vm_offset_t phys, int off, int size) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); + *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ + invltlb_1pg((vm_offset_t)CADDR2); + #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); @@ -2810,9 +2877,6 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size) #endif bzero((char *)CADDR2 + off, size); *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } /* @@ -2825,11 +2889,6 @@ void pmap_copy_page(vm_offset_t src, vm_offset_t dst) { -#ifdef SMP - /* XXX overkill, we only want to disable migration here */ - /* XXX or maybe not. down the track we have reentrancy issues */ - critical_enter(); -#endif if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) @@ -2837,14 +2896,17 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst) *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; - cpu_invlpg((u_int)CADDR1); /* SMP: local only */ - cpu_invlpg((u_int)CADDR2); /* SMP: local only */ +#ifdef I386_CPU + invltlb(); +#else + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +#endif + bcopy(CADDR1, CADDR2, PAGE_SIZE); + *CMAP1 = 0; *CMAP2 = 0; -#ifdef SMP - critical_exit(); -#endif } @@ -2944,7 +3006,7 @@ pmap_remove_pages(pmap, sva, eva) if (tpte == 0) { printf("TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); - panic("bad peter"); + panic("bad pte"); } /* @@ -3260,13 +3322,14 @@ pmap_mapdev(pa, size) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; - for (tmpva = va; size > 0; ) { + for (tmpva = va; size > 0;) { pte = vtopte(tmpva); *pte = pa | PG_RW | PG_V | pgeflag; size -= PAGE_SIZE; tmpva += PAGE_SIZE; + pa += PAGE_SIZE; } - invlpg_range(va, tmpva); + invltlb(); return ((void *)(va + offset)); } @@ -3276,20 +3339,11 @@ pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { - vm_offset_t base, offset, tmpva; - pt_entry_t *pte; + vm_offset_t base, offset; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); - - for (tmpva = base; size > 0; ) { - pte = vtopte(tmpva); - *pte = 0; - size -= PAGE_SIZE; - tmpva += PAGE_SIZE; - } - invlpg_range(va, tmpva); kmem_free(kernel_map, base, size); } diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 0649009..bc58672 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -1591,6 +1591,42 @@ ENTRY(ssdtosd) popl %ebx ret +/* load_cr0(cr0) */ +ENTRY(load_cr0) + movl 4(%esp),%eax + movl %eax,%cr0 + ret + +/* rcr0() */ +ENTRY(rcr0) + movl %cr0,%eax + ret + +/* rcr3() */ +ENTRY(rcr3) + movl %cr3,%eax + ret + +/* void load_cr3(caddr_t cr3) */ +ENTRY(load_cr3) +#ifdef SWTCH_OPTIM_STATS + incl tlb_flush_count +#endif + movl 4(%esp),%eax + movl %eax,%cr3 + ret + +/* rcr4() */ +ENTRY(rcr4) + movl %cr4,%eax + ret + +/* void load_cr4(caddr_t cr4) */ +ENTRY(load_cr4) + movl 4(%esp),%eax + movl %eax,%cr4 + ret + /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 94d5c3a..969541f 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -227,6 +227,62 @@ invd(void) __asm __volatile("invd"); } +#if defined(SMP) && defined(_KERNEL) + +/* + * When using APIC IPI's, invlpg() is not simply the invlpg instruction + * (this is a bug) and the inlining cost is prohibitive since the call + * executes into the IPI transmission system. + */ +void invlpg __P((u_int addr)); +void invltlb __P((void)); + +static __inline void +cpu_invlpg(void *addr) +{ + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +static __inline void +cpu_invltlb(void) +{ + u_int temp; + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() + * is inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) + : : "memory"); +#if defined(SWTCH_OPTIM_STATS) + ++tlb_flush_count; +#endif +} + +#else /* !(SMP && _KERNEL) */ + +static __inline void +invlpg(u_int addr) +{ + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +static __inline void +invltlb(void) +{ + u_int temp; + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() + * is inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) + : : "memory"); +#ifdef SWTCH_OPTIM_STATS + ++tlb_flush_count; +#endif +} + +#endif /* SMP && _KERNEL */ + static __inline u_short inw(u_int port) { @@ -292,6 +348,15 @@ outw(u_int port, u_short data) } static __inline u_int +rcr2(void) +{ + u_int data; + + __asm __volatile("movl %%cr2,%0" : "=r" (data)); + return (data); +} + +static __inline u_int read_eflags(void) { u_int ef; @@ -355,162 +420,6 @@ wrmsr(u_int msr, u_int64_t newval) __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } -static __inline void -load_cr0(u_int data) -{ - - __asm __volatile("movl %0,%%cr0" : : "r" (data)); -} - -static __inline u_int -rcr0(void) -{ - u_int data; - - __asm __volatile("movl %%cr0,%0" : "=r" (data)); - return (data); -} - -static __inline u_int -rcr2(void) -{ - u_int data; - - __asm __volatile("movl %%cr2,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_cr3(u_int data) -{ - - __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); -#if defined(SWTCH_OPTIM_STATS) - ++tlb_flush_count; -#endif -} - -static __inline u_int -rcr3(void) -{ - u_int data; - - __asm __volatile("movl %%cr3,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_cr4(u_int data) -{ - __asm __volatile("movl %0,%%cr4" : : "r" (data)); -} - -static __inline u_int -rcr4(void) -{ - u_int data; - - __asm __volatile("movl %%cr4,%0" : "=r" (data)); - return (data); -} - -/* - * Global TLB flush (except for thise for pages marked PG_G) - */ -static __inline void -cpu_invltlb(void) -{ - - load_cr3(rcr3()); -} - -/* - * TLB flush for an individual page (even if it has PG_G). - * Only works on 486+ CPUs (i386 does not have PG_G). - */ -static __inline void -cpu_invlpg(u_int addr) -{ - -#ifndef I386_CPU - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); -#else - cpu_invltlb(); -#endif -} - -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -/* - * Same as above but for a range of pages. - */ -static __inline void -cpu_invlpg_range(u_int startva, u_int endva) -{ -#ifndef I386_CPU - u_int addr; - - for (addr = startva; addr < endva; addr += PAGE_SIZE) - __asm __volatile("invlpg %0" : : "m" (*(char *)addr)); - __asm __volatile("" : : : "memory"); -#else - cpu_invltlb(); -#endif -} -#endif - -#ifdef SMP -extern void smp_invlpg(u_int addr); -extern void smp_masked_invlpg(u_int mask, u_int addr); -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -extern void smp_invlpg_range(u_int startva, u_int endva); -extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva); -#endif -extern void smp_invltlb(void); -extern void smp_masked_invltlb(u_int mask); -#endif - -/* - * Generic page TLB flush. Takes care of SMP. - */ -static __inline void -invlpg(u_int addr) -{ - - cpu_invlpg(addr); -#ifdef SMP - smp_invlpg(addr); -#endif -} - -#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */ -/* - * Generic TLB flush for a range of pages. Takes care of SMP. - * Saves many IPIs for SMP mode. - */ -static __inline void -invlpg_range(u_int startva, u_int endva) -{ - - cpu_invlpg_range(startva, endva); -#ifdef SMP - smp_invlpg_range(startva, endva); -#endif -} -#endif - -/* - * Generic global TLB flush (except for thise for pages marked PG_G) - */ -static __inline void -invltlb(void) -{ - - cpu_invltlb(); -#ifdef SMP - smp_invltlb(); -#endif -} - static __inline u_int rfs(void) { @@ -672,8 +581,6 @@ cpu_critical_exit(critical_t eflags) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); -void cpu_invlpg __P((u_int addr)); -void cpu_invlpg_range __P((u_int start, u_int end)); void disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); void enable_intr __P((void)); @@ -684,26 +591,15 @@ void insl __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt)); void invd __P((void)); void invlpg __P((u_int addr)); -void invlpg_range __P((u_int start, u_int end)); void invltlb __P((void)); u_short inw __P((u_int port)); -void load_cr0 __P((u_int cr0)); -void load_cr3 __P((u_int cr3)); -void load_cr4 __P((u_int cr4)); -void load_fs __P((u_int sel)); -void load_gs __P((u_int sel)); void outb __P((u_int port, u_char data)); void outl __P((u_int port, u_int data)); void outsb __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt)); void outw __P((u_int port, u_short data)); -u_int rcr0 __P((void)); u_int rcr2 __P((void)); -u_int rcr3 __P((void)); -u_int rcr4 __P((void)); -u_int rfs __P((void)); -u_int rgs __P((void)); u_int64_t rdmsr __P((u_int msr)); u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdtsc __P((void)); @@ -711,12 +607,22 @@ u_int read_eflags __P((void)); void wbinvd __P((void)); void write_eflags __P((u_int ef)); void wrmsr __P((u_int msr, u_int64_t newval)); +u_int rfs __P((void)); +u_int rgs __P((void)); +void load_fs __P((u_int sel)); +void load_gs __P((u_int sel)); critical_t cpu_critical_enter __P((void)); void cpu_critical_exit __P((critical_t eflags)); #endif /* __GNUC__ */ +void load_cr0 __P((u_int cr0)); +void load_cr3 __P((u_int cr3)); +void load_cr4 __P((u_int cr4)); void ltr __P((u_short sel)); +u_int rcr0 __P((void)); +u_int rcr3 __P((void)); +u_int rcr4 __P((void)); void reset_dbregs __P((void)); __END_DECLS diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 008dfc5..27ee7ae 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -287,14 +287,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -#ifdef APIC_IO -/* Variables needed for SMP tlb shootdown. */ -u_int smp_tlb_addr1; -u_int smp_tlb_addr2; -volatile int smp_tlb_wait; -static struct mtx smp_tlb_mtx; -#endif - /* * Local data and functions. */ @@ -343,9 +335,6 @@ init_locks(void) #ifdef USE_COMLOCK mtx_init(&com_mtx, "com", MTX_SPIN); #endif /* USE_COMLOCK */ -#ifdef APIC_IO - mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN); -#endif } /* @@ -615,10 +604,6 @@ mp_enable(u_int boot_addr) /* install an inter-CPU IPI for TLB invalidation */ setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLPG_OFFSET, Xinvlpg, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(XINVLRNG_OFFSET, Xinvlrng, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* install an inter-CPU IPI for forwarding hardclock() */ setidt(XHARDCLOCK_OFFSET, Xhardclock, @@ -2201,198 +2186,42 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } -#if defined(APIC_IO) - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif - /* * Flush the TLB on all other CPU's + * + * XXX: Needs to handshake and wait for completion before proceding. */ -static void -smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2) -{ - u_int ncpu; - register_t eflags; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - ipi_all_but_self(vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2) -{ - u_int m; - int i, ncpu, othercpus; - register_t eflags; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - /* XXX there should be a pcpu self mask */ - mask &= ~(1 << PCPU_GET(cpuid)); - if (mask == 0) - return; - /* Count the target cpus */ - ncpu = 0; - m = mask; - while ((i = ffs(m)) != 0) { - m >>= i; - ncpu++; - } - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - eflags = read_eflags(); - if ((eflags & PSL_I) == 0) - panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); - mtx_lock_spin(&smp_tlb_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - smp_tlb_wait = 0; - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (atomic_load_acq_int(&smp_tlb_wait) < ncpu) - /* XXX cpu_pause() */ ; - mtx_unlock_spin(&smp_tlb_mtx); -} -#endif - void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } + if (smp_started) + ipi_all_but_self(IPI_INVLTLB); #endif /* APIC_IO */ } void -smp_invlpg(u_int addr) +invlpg(u_int addr) { -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -#endif /* APIC_IO */ -} + __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); -void -smp_invlpg_range(u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } void -smp_masked_invltlb(u_int mask) +invltlb(void) { -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -#endif /* APIC_IO */ -} + u_long temp; -void -smp_masked_invlpg(u_int mask, u_int addr) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -#endif /* APIC_IO */ -} + /* + * This should be implemented as load_cr3(rcr3()) when load_cr3() is + * inlined. + */ + __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); -void -smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2) -{ -#if defined(APIC_IO) - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -#endif /* APIC_IO */ + /* send a message to the other CPUs */ + smp_invltlb(); } @@ -2451,9 +2280,6 @@ ap_init(void) /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - if (bootverbose) - apic_dump("ap_init()"); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); if (smp_cpus == mp_ncpus) { @@ -2486,8 +2312,7 @@ forwarded_statclock(struct trapframe frame) { mtx_lock_spin(&sched_lock); - statclock_process(curthread->td_kse, TRAPF_PC(&frame), - TRAPF_USERMODE(&frame)); + statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); mtx_unlock_spin(&sched_lock); } diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index 618bb3f..7358a9e 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -151,7 +151,7 @@ extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde, APTDpde; -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ +extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */ #endif #ifdef _KERNEL @@ -267,7 +267,9 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t)); pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); +#ifdef SMP void pmap_set_opt __P((void)); +#endif #endif /* _KERNEL */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 4136c20..34228e2 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -51,8 +51,6 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */ * Interprocessor interrupts for SMP. */ #define IPI_INVLTLB XINVLTLB_OFFSET -#define IPI_INVLPG XINVLPG_OFFSET -#define IPI_INVLRNG XINVLRNG_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_AST XCPUAST_OFFSET #define IPI_STOP XCPUSTOP_OFFSET @@ -109,6 +107,7 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); +void smp_invltlb __P((void)); void forward_statclock __P((void)); void forwarded_statclock __P((struct trapframe frame)); void forward_hardclock __P((void)); diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index ea8e84b..d666148 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -90,6 +90,13 @@ */ #define APIC_INTR_REORDER +/* + * Redirect clock interrupts to a higher priority (fast intr) vector, + * while still using the slow interrupt handler. Only effective when + * APIC_INTR_REORDER is defined. + */ +#define APIC_INTR_HIGHPRI_CLOCK + #endif /* APIC_IO */ /* diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index e3a37e1..95c9133 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -181,108 +181,30 @@ Xspuriousint: iret /* - * Global address space TLB shootdown. + * Handle TLB shootdowns. */ .text SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds #ifdef COUNT_XINVLTLB_HITS pushl %fs - movl $KPSEL, %eax /* Private space selector */ + movl $KPSEL, %eax mov %ax, %fs movl PCPU(CPUID), %eax popl %fs - incl xhits_gbl(,%eax,4) + ss + incl _xhits(,%eax,4) #endif /* COUNT_XINVLTLB_HITS */ movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 + ss /* stack segment, avoid %ds load */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Single page TLB shootdown - */ - .text - SUPERALIGN_TEXT - .globl Xinvlpg -Xinvlpg: - pushl %eax - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - ss - incl xhits_pg(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %eax - invlpg (%eax) /* invalidate single page */ - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %eax - iret - -/* - * Page range TLB shootdown. - */ - .text - SUPERALIGN_TEXT - .globl Xinvlrng -Xinvlrng: - pushl %eax - pushl %edx - pushl %ds - movl $KDSEL, %eax /* Kernel data selector */ - mov %ax, %ds - -#ifdef COUNT_XINVLTLB_HITS - pushl %fs - movl $KPSEL, %eax /* Private space selector */ - mov %ax, %fs - movl PCPU(CPUID), %eax - popl %fs - incl xhits_rng(,%eax,4) -#endif /* COUNT_XINVLTLB_HITS */ - - movl smp_tlb_addr1, %edx - movl smp_tlb_addr2, %eax -1: invlpg (%edx) /* invalidate single page */ - addl $PAGE_SIZE, %edx - cmpl %edx, %eax - jb 1b - - movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ - - lock - incl smp_tlb_wait - - popl %ds - popl %edx popl %eax iret @@ -521,6 +443,12 @@ Xrendezvous: .data +#ifdef COUNT_XINVLTLB_HITS + .globl _xhits +_xhits: + .space (NCPU * 4), 0 +#endif /* COUNT_XINVLTLB_HITS */ + .globl apic_pin_trigger apic_pin_trigger: .long 0 diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index 92bf581..cfc162b 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -499,6 +499,14 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; +#ifdef APIC_INTR_REORDER +#ifdef APIC_INTR_HIGHPRI_CLOCK + /* XXX: Hack (kludge?) for more accurate clock. */ + if (intr == apic_8254_intr || intr == 8) { + vector = TPR_FAST_INTS + intr; + } +#endif +#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index 789b02b..1726635 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -88,7 +88,6 @@ /* IDT vector base for regular (aka. slow) and fast interrupts */ #define TPR_SLOW_INTS 0x20 #define TPR_FAST_INTS 0x60 -/* XXX note that the AST interrupt is at 0x50 */ /* blocking values for local APIC Task Priority Register */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */ @@ -105,23 +104,20 @@ #endif /** TEST_TEST1 */ /* TLB shootdowns */ -#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */ -#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */ -#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */ +#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* inter-cpu clock handling */ -#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */ -#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */ +#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) +#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) /* inter-CPU rendezvous */ -#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */ +#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) /* IPI to generate an additional software trap at the target CPU */ -/* XXX in the middle of the interrupt range, overlapping IRQ48 */ -#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */ +#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* IPI to signal CPUs to stop and wait for another CPU to restart them */ -#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */ +#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: @@ -185,9 +181,7 @@ inthand_t IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); inthand_t - Xinvltlb, /* TLB shootdowns - global */ - Xinvlpg, /* TLB shootdowns - 1 page */ - Xinvlrng, /* TLB shootdowns - page range */ + Xinvltlb, /* TLB shootdowns */ Xhardclock, /* Forward hardclock() */ Xstatclock, /* Forward statclock() */ Xcpuast, /* Additional software trap on other cpu */ diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c index 92bf581..cfc162b 100644 --- a/sys/i386/isa/nmi.c +++ b/sys/i386/isa/nmi.c @@ -499,6 +499,14 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) } else { vector = TPR_SLOW_INTS + intr; +#ifdef APIC_INTR_REORDER +#ifdef APIC_INTR_HIGHPRI_CLOCK + /* XXX: Hack (kludge?) for more accurate clock. */ + if (intr == apic_8254_intr || intr == 8) { + vector = TPR_FAST_INTS + intr; + } +#endif +#endif setidt(vector, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 3bad53c..3a1b56c 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -551,7 +551,7 @@ exec_map_first_page(imgp) vm_page_wire(ma[0]); vm_page_wakeup(ma[0]); - pmap_qenter((vm_offset_t)imgp->image_header, ma, 1); + pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0])); imgp->firstpage = ma[0]; return 0; @@ -564,7 +564,7 @@ exec_unmap_first_page(imgp) GIANT_REQUIRED; if (imgp->firstpage) { - pmap_qremove((vm_offset_t)imgp->image_header, 1); + pmap_kremove((vm_offset_t) imgp->image_header); vm_page_unwire(imgp->firstpage, 1); imgp->firstpage = NULL; } diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index be50836..39e3243 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -222,9 +222,6 @@ static struct witness_order_list_entry order_lists[] = { { "icu", &lock_class_mtx_spin }, #ifdef SMP { "smp rendezvous", &lock_class_mtx_spin }, -#ifdef __i386__ - { "tlb", &lock_class_mtx_spin }, -#endif #endif { "clk", &lock_class_mtx_spin }, { NULL, NULL }, diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 02b75b9..e15f191 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -278,14 +278,14 @@ proc_rwmem(struct proc *p, struct uio *uio) vm_object_reference(object); vm_map_lookup_done(tmap, out_entry); - pmap_qenter(kva, &m, 1); + pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); /* * Now do the i/o move. */ error = uiomove((caddr_t)(kva + page_offset), len, uio); - pmap_qremove(kva, 1); + pmap_kremove(kva); /* * release the page and the object diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d3a849c..32e91ca 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3244,7 +3244,7 @@ tryagain: vm_page_wire(p); p->valid = VM_PAGE_BITS_ALL; vm_page_flag_clear(p, PG_ZERO); - pmap_qenter(pg, &p, 1); + pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); bp->b_pages[index] = p; vm_page_wakeup(p); } @@ -3272,7 +3272,7 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) bp->b_blkno, bp->b_lblkno); } bp->b_pages[index] = NULL; - pmap_qremove(pg, 1); + pmap_kremove(pg); vm_page_busy(p); vm_page_unwire(p, 0); vm_page_free(p); diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 3a529b6..6cb678a 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -318,7 +318,7 @@ vm_pager_map_page(m) vm_offset_t kva; kva = kmem_alloc_wait(pager_map, PAGE_SIZE); - pmap_qenter(kva, &m, 1); + pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); return (kva); } @@ -326,7 +326,7 @@ void vm_pager_unmap_page(kva) vm_offset_t kva; { - pmap_qremove(kva, 1); + pmap_kremove(kva); kmem_free_wakeup(pager_map, kva, PAGE_SIZE); } diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c index 5057b6e..a1b1d3c 100644 --- a/sys/vm/vm_zone.c +++ b/sys/vm/vm_zone.c @@ -386,7 +386,7 @@ _zget(vm_zone_t z) break; zkva = z->zkva + z->zpagecount * PAGE_SIZE; - pmap_qenter(zkva, &m, 1); + pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); bzero((caddr_t) zkva, PAGE_SIZE); z->zpagecount++; atomic_add_int(&zone_kmem_pages, 1); |