diff options
Diffstat (limited to 'sys/i386')
-rw-r--r-- | sys/i386/i386/apic_vector.s | 16 | ||||
-rw-r--r-- | sys/i386/i386/db_trace.c | 3 | ||||
-rw-r--r-- | sys/i386/i386/intr_machdep.c | 10 | ||||
-rw-r--r-- | sys/i386/i386/mp_machdep.c | 219 | ||||
-rw-r--r-- | sys/i386/i386/pmap.c | 184 | ||||
-rw-r--r-- | sys/i386/i386/swtch.s | 6 | ||||
-rw-r--r-- | sys/i386/i386/vm_machdep.c | 36 | ||||
-rw-r--r-- | sys/i386/include/_types.h | 1 | ||||
-rw-r--r-- | sys/i386/include/pmap.h | 3 | ||||
-rw-r--r-- | sys/i386/include/sf_buf.h | 3 | ||||
-rw-r--r-- | sys/i386/include/smp.h | 12 | ||||
-rw-r--r-- | sys/i386/include/xen/xenvar.h | 1 | ||||
-rw-r--r-- | sys/i386/xen/mp_machdep.c | 198 | ||||
-rw-r--r-- | sys/i386/xen/pmap.c | 147 |
14 files changed, 333 insertions, 506 deletions
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 86e78c4..a78b601 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -357,20 +357,4 @@ IDTVEC(rendezvous) POP_FRAME iret -/* - * Clean up when we lose out on the lazy context switch optimization. - * ie: when we are about to release a PTD but a cpu is still borrowing it. - */ - SUPERALIGN_TEXT -IDTVEC(lazypmap) - PUSH_FRAME - SET_KERNEL_SREGS - cld - - call pmap_lazyfix_action - - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - POP_FRAME - iret #endif /* SMP */ diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c index 445d9c5..79da4dc 100644 --- a/sys/i386/i386/db_trace.c +++ b/sys/i386/i386/db_trace.c @@ -312,8 +312,7 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td) frame_type = TRAP_TIMERINT; else if (strcmp(name, "Xcpustop") == 0 || strcmp(name, "Xrendezvous") == 0 || - strcmp(name, "Xipi_intr_bitmap_handler") == 0 || - strcmp(name, "Xlazypmap") == 0) + strcmp(name, "Xipi_intr_bitmap_handler") == 0) frame_type = TRAP_INTERRUPT; } diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c index 77b8004..56529f7 100644 --- a/sys/i386/i386/intr_machdep.c +++ b/sys/i386/i386/intr_machdep.c @@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -432,7 +431,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused) return; #endif + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index a4db401..966e46e 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -165,7 +166,6 @@ u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; -u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif @@ -173,7 +173,7 @@ static u_long *ipi_hardclock_counts[MAXCPU]; * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -210,7 +210,7 @@ static void release_aps(void *dummy); static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; @@ -382,7 +382,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -524,7 +524,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -575,10 +575,6 @@ cpu_mp_start(void) setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - /* Install an inter-CPU IPI for lazy pmap release */ - setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -659,6 +655,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; vm_offset_t addr; int gsel_tss; @@ -783,19 +780,22 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -932,6 +932,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus; #ifndef PC98 u_char mpbiosreason; #endif @@ -991,11 +992,13 @@ start_all_aps(void) } CHECK_PRINT("trace"); /* show checkpoints */ - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1192,6 +1195,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, #endif /* COUNT_XINVLTLB_HITS */ /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + +/* * Flush the TLB on all other CPU's */ static void @@ -1215,28 +1242,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1245,39 +1263,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1324,7 +1328,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1336,7 +1340,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1348,7 +1352,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1401,7 +1405,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1411,12 +1415,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1434,7 +1438,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1447,8 +1451,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1458,7 +1464,9 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } @@ -1466,7 +1474,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1474,11 +1482,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1490,23 +1500,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1533,30 +1545,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); static int sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) { - cpumask_t mask; + cpuset_t mask; int error; mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); + error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req); if (error || !req->newptr) return (error); - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; + CPU_OR(&mask, &hyperthreading_cpus_mask); - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); + if (CPU_SUBSET(&mask, &all_cpus)) + CPU_CLR(0, &mask); hlt_cpus_mask = mask; return (error); } -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", +SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S", "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); static int @@ -1570,15 +1582,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) return (error); if (disable) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); else - hlt_cpus_mask &= ~logical_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hlt_logical_cpus = disable; return (error); @@ -1605,18 +1617,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) #endif if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask); else - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hyperthreading_allowed = allowed; return (error); @@ -1626,7 +1638,7 @@ static void cpu_hlt_setup(void *dummy __unused) { - if (logical_cpus_mask != 0) { + if (!CPU_EMPTY(&logical_cpus_mask)) { TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", &hlt_logical_cpus); sysctl_ctx_init(&logical_cpu_clist); @@ -1640,20 +1652,21 @@ cpu_hlt_setup(void *dummy __unused) &logical_cpus_mask, 0, ""); if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); /* * If necessary for security purposes, force * hyperthreading off, regardless of the value * of hlt_logical_cpus. */ - if (hyperthreading_cpus_mask) { + if (!CPU_EMPTY(&hyperthreading_cpus_mask)) { SYSCTL_ADD_PROC(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hyperthreading_allowed, "IU", ""); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, + &hyperthreading_cpus_mask); } } } @@ -1662,7 +1675,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); int mp_grab_cpu_hlt(void) { - cpumask_t mask; + cpuset_t mask; #ifdef MP_WATCHDOG u_int cpuid; #endif @@ -1675,7 +1688,7 @@ mp_grab_cpu_hlt(void) #endif retval = 0; - while (mask & hlt_cpus_mask) { + while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) { retval = 1; __asm __volatile("sti; hlt" : : : "memory"); } @@ -1705,8 +1718,6 @@ mp_ipi_intrcnt(void *dummy) intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); - intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index d10bbe5..c85f138 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); @@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg) pd_entry_t *pde; pmap_t pmap; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); + /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being @@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg) pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } + } else + sched_unpin(); } static void @@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1058,21 +1075,23 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); @@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1095,7 +1114,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1113,7 +1132,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap) #endif } - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1881,98 +1900,6 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static cpumask_t *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - -void pmap_lazyfix_action(void); - -void -pmap_lazyfix_action(void) -{ - cpumask_t mymask = PCPU_GET(cpumask); - -#ifdef COUNT_IPIS - (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; -#endif - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(cpumask_t mymask) -{ - - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - cpumask_t mymask, mask; - u_int spins; - - while ((mask = pmap->pm_active) != 0) { - spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ - mtx_lock_spin(&smp_ipi_mtx); -#ifdef PAE - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - mymask = PCPU_GET(cpumask); - if (mask == mymask) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(mymask); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } - } - mtx_unlock_spin(&smp_ipi_mtx); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == rcr3()) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); - } -} -#endif /* SMP */ - /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1990,7 +1917,6 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_root == NULL, ("pmap_release: pmap has reserved page table page(s)")); - pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); @@ -5078,11 +5004,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 680b032..6547569 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -174,12 +174,6 @@ ENTRY(cpu_switch) /* switch address space */ movl PCB_CR3(%edx),%eax -#ifdef PAE - cmpl %eax,IdlePDPT /* Kernel address space? */ -#else - cmpl %eax,IdlePTD /* Kernel address space? */ -#endif - je sw0 READ_CR3(%ebx) /* The same address space? */ cmpl %ebx,%eax je sw0 diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 232e1a1..a084e09 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -573,11 +573,13 @@ kvtop(void *addr) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); + CPU_SETOF(cpu_reset_proxyid, &tcrp); + stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); @@ -596,25 +598,29 @@ cpu_reset() #endif #ifdef SMP - cpumask_t map; + cpuset_t map; u_int cnt; if (smp_active) { - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map != 0) { + sched_pin(); + map = PCPU_GET(other_cpus); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); + sched_unpin(); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ /* XXX: restart_cpus(1 << 0); */ - atomic_store_rel_int(&started_cpus, (1 << 0)); + CPU_SETOF(0, &started_cpus); + wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) @@ -626,7 +632,8 @@ cpu_reset() while (1); /* NOTREACHED */ - } + } else + sched_unpin(); DELAY(1000000); } @@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags) struct sf_head *hash_list; struct sf_buf *sf; #ifdef SMP - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; #endif int error; @@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags) */ #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - sf->cpumask = 0; + CPU_ZERO(&sf->cpumask); shootdown: sched_pin(); cpumask = PCPU_GET(cpumask); - if ((sf->cpumask & cpumask) == 0) { - sf->cpumask |= cpumask; + if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) { + CPU_OR(&sf->cpumask, &cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { - other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask; - if (other_cpus != 0) { - sf->cpumask |= other_cpus; + other_cpus = PCPU_GET(other_cpus); + CPU_NAND(&other_cpus, &sf->cpumask); + if (!CPU_EMPTY(&other_cpus)) { + CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva); } } - sched_unpin(); + sched_unpin(); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index 7a969fe..3194fd6 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -69,7 +69,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef unsigned long __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef long double __double_t; typedef long double __float_t; diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index eeada2e..3012a00 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -155,6 +155,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -433,7 +434,7 @@ struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ #ifdef PAE diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h index 7bc1095..415dcbb 100644 --- a/sys/i386/include/sf_buf.h +++ b/sys/i386/include/sf_buf.h @@ -29,6 +29,7 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ +#include <sys/_cpuset.h> #include <sys/queue.h> struct vm_page; @@ -40,7 +41,7 @@ struct sf_buf { vm_offset_t kva; /* va of mapping */ int ref_count; /* usage of this mapping */ #ifdef SMP - cpumask_t cpumask; /* cpus on which mapping is valid */ + cpuset_t cpumask; /* cpus on which mapping is valid */ #endif }; diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index d364cd9..cdec9c8 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -42,7 +42,6 @@ extern u_long *ipi_invlrng_counts[MAXCPU]; extern u_long *ipi_invlpg_counts[MAXCPU]; extern u_long *ipi_invlcache_counts[MAXCPU]; extern u_long *ipi_rendezvous_counts[MAXCPU]; -extern u_long *ipi_lazypmap_counts[MAXCPU]; #endif /* IPI handlers */ @@ -53,8 +52,7 @@ inthand_t IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ - IDTVEC(rendezvous), /* handle CPU rendezvous */ - IDTVEC(lazypmap); /* handle lazy pmap release */ + IDTVEC(rendezvous); /* handle CPU rendezvous */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); @@ -66,17 +64,17 @@ void ipi_bitmap_handler(struct trapframe frame); #endif void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #ifdef XEN void ipi_to_irq_init(void); diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h index 4f8c857..365930a 100644 --- a/sys/i386/include/xen/xenvar.h +++ b/sys/i386/include/xen/xenvar.h @@ -99,7 +99,6 @@ void xpq_init(void); (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] -typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } xen_cpumask_t; int xen_create_contiguous_region(vm_page_t pages, int npages); diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 1bfc0e9..1565859 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -116,7 +117,7 @@ volatile int smp_tlb_wait; typedef void call_data_func_t(uintptr_t , uintptr_t); static u_int logical_cpus; -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -149,11 +150,10 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); -extern void pmap_lazyfix_action(void); struct cpu_group * cpu_topo(void) @@ -239,7 +239,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -293,7 +293,8 @@ cpu_mp_start(void) start_all_aps(); /* Setup the initial logical CPUs info. */ - logical_cpus = logical_cpus_mask = 0; + logical_cpus = 0; + CPU_ZERO(&logical_cpus_mask); if (cpu_feature & CPUID_HTT) logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; @@ -340,24 +341,16 @@ iv_invlcache(uintptr_t a, uintptr_t b) atomic_add_int(&smp_tlb_wait, 1); } -static void -iv_lazypmap(uintptr_t a, uintptr_t b) -{ - pmap_lazyfix_action(); - atomic_add_int(&smp_tlb_wait, 1); -} - /* * These start from "IPI offset" APIC_IPI_INTS */ -static call_data_func_t *ipi_vectors[6] = +static call_data_func_t *ipi_vectors[5] = { iv_rendezvous, iv_invltlb, iv_invlpg, iv_invlrng, iv_invlcache, - iv_lazypmap, }; /* @@ -521,6 +514,7 @@ xen_smp_intr_init_cpus(void *unused) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; vm_offset_t addr; int gsel_tss; @@ -600,18 +594,21 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); #if 0 if (bootverbose) lapic_dump("AP"); @@ -725,6 +722,7 @@ assign_cpu_ids(void) int start_all_aps(void) { + cpuset_t tallcpus; int x,apic_id, cpu; struct pcpu *pc; @@ -778,12 +776,14 @@ start_all_aps(void) panic("bye-bye"); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); @@ -960,6 +960,30 @@ start_ap(int apic_id) } /* + * send an IPI to a specific CPU. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (!old_pending) + ipi_pcpu(cpu, RESCHEDULE_VECTOR); + } else { + KASSERT(call_data != NULL, ("call_data not set")); + ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); + } +} + +/* * Flush the TLB on all other CPU's */ static void @@ -988,29 +1012,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; struct _call_data data; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + critical_enter(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + critical_exit(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1022,10 +1037,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; @@ -1068,7 +1093,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1077,7 +1102,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1086,7 +1111,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1098,17 +1123,9 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; - u_int bitmap = 0; - u_int old_pending; - u_int new_pending; - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit @@ -1116,25 +1133,13 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); - - if (bitmap) { - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (!old_pending) - ipi_pcpu(cpu, RESCHEDULE_VECTOR); - } else { - KASSERT(call_data != NULL, ("call_data not set")); - ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); - } + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); + ipi_send_cpu(cpu, ipi); } } @@ -1144,14 +1149,6 @@ ipi_selected(cpumask_t cpus, u_int ipi) void ipi_cpu(int cpu, u_int ipi) { - u_int bitmap = 0; - u_int old_pending; - u_int new_pending; - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit @@ -1159,22 +1156,10 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); - - if (bitmap) { - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (!old_pending) - ipi_pcpu(cpu, RESCHEDULE_VECTOR); - } else { - KASSERT(call_data != NULL, ("call_data not set")); - ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); - } + ipi_send_cpu(cpu, ipi); } /* @@ -1183,23 +1168,27 @@ ipi_cpu(int cpu, u_int ipi) void ipi_all_but_self(u_int ipi) { + cpuset_t other_cpus; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ + sched_pin(); + other_cpus = PCPU_GET(other_cpus); + sched_unpin(); if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - ipi_selected(PCPU_GET(other_cpus), ipi); + ipi_selected(other_cpus, ipi); } int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1207,11 +1196,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1223,20 +1214,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - int cpu = PCPU_GET(cpuid); - int cpumask = PCPU_GET(cpumask); + cpuset_t cpumask; + int cpu; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + cpu = PCPU_GET(cpuid); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index eb3c803..74ace61 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); @@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", pmap, sva, eva); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); PT_UPDATES_FLUSH(); } @@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); PT_UPDATES_FLUSH(); @@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap) CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap) #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap) } xen_flush_queue(); vm_page_unlock_queues(); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1681,98 +1683,6 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static cpumask_t *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - -void pmap_lazyfix_action(void); - -void -pmap_lazyfix_action(void) -{ - cpumask_t mymask = PCPU_GET(cpumask); - -#ifdef COUNT_IPIS - (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; -#endif - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(cpumask_t mymask) -{ - - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - cpumask_t mymask, mask; - u_int spins; - - while ((mask = pmap->pm_active) != 0) { - spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ - mtx_lock_spin(&smp_ipi_mtx); -#ifdef PAE - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - mymask = PCPU_GET(cpumask); - if (mask == mymask) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(mymask); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } - } - mtx_unlock_spin(&smp_ipi_mtx); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == rcr3()) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); - } -} -#endif /* SMP */ - /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1798,7 +1708,6 @@ pmap_release(pmap_t pmap) mtx_lock(&createdelete_lock); #endif - pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); @@ -4123,11 +4032,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); |