diff options
author | attilio <attilio@FreeBSD.org> | 2011-06-07 08:46:13 +0000 |
---|---|---|
committer | attilio <attilio@FreeBSD.org> | 2011-06-07 08:46:13 +0000 |
commit | 4930b11d919b23e6c50c7a49ce4fe60212713d99 (patch) | |
tree | 843804a2d1047c81b8b937609df7c87c55600134 | |
parent | 6ed4191fed49dda0af9b5b47c3fdd5814082ab72 (diff) | |
parent | 6ed3ca2c5bfedf9a07bb0032119b1099bfdc3e85 (diff) | |
download | FreeBSD-src-4930b11d919b23e6c50c7a49ce4fe60212713d99.zip FreeBSD-src-4930b11d919b23e6c50c7a49ce4fe60212713d99.tar.gz |
etire the cpumask_t type and replace it with cpuset_t usage.
This is intended to fix the bug where cpu mask objects are
capped to 32. MAXCPU, then, can now arbitrarely bumped to whatever
value. Anyway, as long as several structures in the kernel are
statically allocated and sized as MAXCPU, it is suggested to keep it
as low as possible for the time being.
Technical notes on this commit itself:
- More functions to handle with cpuset_t objects are introduced.
The most notable are cpusetobj_ffs() (which calculates a ffs(3)
for a cpuset_t object), cpusetobj_strprint() (which prepares a string
representing a cpuset_t object) and cpusetobj_strscan() (which
creates a valid cpuset_t starting from a string representation).
- pc_cpumask and pc_other_cpus are target to be removed soon.
With the moving from cpumask_t to cpuset_t they are now inefficient
and not really useful. Anyway, for the time being, please note that
access to pcpu datas is protected by sched_pin() in order to avoid
migrating the CPU while reading more than one (possible) word
- Please note that size of cpuset_t objects may differ between kernel
and userland. While this is not directly related to the patch itself,
it is good to understand that concept and possibly use the patch
as a reference on how to deal with cpuset_t objects in userland, when
accessing kernland members.
- KTR_CPUMASK is changed and now is represented through a string, to be
set as the example reported in NOTES.
Please additively note that no MAXCPU is bumped in this patch, but
private testing has been done until to MAXCPU=128 on a real 8x8x2(htt)
machine (amd64).
Please note that the FreeBSD version is not yet bumped because of
the upcoming pcpu changes. However, note that this patch is not
targeted for MFC.
People to thank for the time spent on this patch:
- sbruno, pluknet and Nicholas Esborn (nick AT desert DOT net) tested
several revision of the patches and really helped in improving
stability of this work.
- marius fixed several bugs in the sparc64 implementation and reviewed
patches related to ktr.
- jeff and jhb discussed the basic approach followed.
- kib and marcel made targeted review on some specific part of the
patch.
- marius, art, nwhitehorn and andreast reviewed MD specific part of
the patch.
- marius, andreast, gonzo, nwhitehorn and jceel tested MD specific
implementations of the patch.
- Other people have made contributions on other patches that have been
already committed and have been listed separately.
Companies that should be mentioned for having participated at several
degrees:
- Yahoo! for having offered the machines used for testing on big
count of CPUs.
- The FreeBSD Foundation for having sponsored my devsummit attendance,
which has been instrumental.
- Sandvine for having offered offices and infrastructure during
development.
(I really hope I didn't forget anyone, if it happened I apologize in
advance).
88 files changed, 1374 insertions, 782 deletions
@@ -22,6 +22,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW: machines to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20110607: + cpumask_t type is retired and cpuset_t is used in order to describe + a mask of CPUs. + 20110513: Support for sun4v architecture is officially dropped diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c index 5036c9c..461f408 100644 --- a/gnu/usr.bin/gdb/kgdb/kthr.c +++ b/gnu/usr.bin/gdb/kgdb/kthr.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> +#include <sys/cpuset.h> #include <sys/proc.h> #include <sys/types.h> #include <sys/signal.h> @@ -37,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include <defs.h> #include <frame-unwind.h> @@ -48,7 +50,7 @@ static CORE_ADDR dumppcb; static int dumptid; static CORE_ADDR stoppcbs; -static __cpumask_t stopped_cpus; +static cpuset_t stopped_cpus; static struct kthr *first; struct kthr *curkthr; @@ -76,6 +78,7 @@ kgdb_thr_init(void) { struct proc p; struct thread td; + long cpusetsize; struct kthr *kt; CORE_ADDR addr; uintptr_t paddr; @@ -102,10 +105,11 @@ kgdb_thr_init(void) dumptid = -1; addr = kgdb_lookup("stopped_cpus"); - if (addr != 0) - kvm_read(kvm, addr, &stopped_cpus, sizeof(stopped_cpus)); - else - stopped_cpus = 0; + CPU_ZERO(&stopped_cpus); + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize != -1 && (u_long)cpusetsize <= sizeof(cpuset_t) && + addr != 0) + kvm_read(kvm, addr, &stopped_cpus, cpusetsize); stoppcbs = kgdb_lookup("stoppcbs"); @@ -126,8 +130,8 @@ kgdb_thr_init(void) kt->kaddr = addr; if (td.td_tid == dumptid) kt->pcb = dumppcb; - else if (td.td_state == TDS_RUNNING && ((1 << td.td_oncpu) & stopped_cpus) - && stoppcbs != 0) + else if (td.td_state == TDS_RUNNING && stoppcbs != 0 && + CPU_ISSET(td.td_oncpu, &stopped_cpus)) kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu; else kt->pcb = (uintptr_t)td.td_pcb; diff --git a/lib/libkvm/kvm_pcpu.c b/lib/libkvm/kvm_pcpu.c index fd09fc8..bc73baf 100644 --- a/lib/libkvm/kvm_pcpu.c +++ b/lib/libkvm/kvm_pcpu.c @@ -39,11 +39,13 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> +#include <sys/cpuset.h> #include <sys/pcpu.h> #include <sys/sysctl.h> #include <kvm.h> #include <limits.h> #include <stdlib.h> +#include <unistd.h> #include "kvm_private.h" @@ -118,6 +120,9 @@ _kvm_pcpu_clear(void) void * kvm_getpcpu(kvm_t *kd, int cpu) { + long kcpusetsize; + ssize_t nbytes; + uintptr_t readptr; char *buf; if (kd == NULL) { @@ -125,6 +130,10 @@ kvm_getpcpu(kvm_t *kd, int cpu) return (NULL); } + kcpusetsize = sysconf(_SC_CPUSET_SIZE); + if (kcpusetsize == -1 || (u_long)kcpusetsize > sizeof(cpuset_t)) + return ((void *)-1); + if (maxcpu == 0) if (_kvm_pcpu_init(kd) < 0) return ((void *)-1); @@ -137,8 +146,26 @@ kvm_getpcpu(kvm_t *kd, int cpu) _kvm_err(kd, kd->program, "out of memory"); return ((void *)-1); } - if (kvm_read(kd, (uintptr_t)pcpu_data[cpu], buf, sizeof(struct pcpu)) != - sizeof(struct pcpu)) { + nbytes = sizeof(struct pcpu) - 2 * kcpusetsize; + readptr = (uintptr_t)pcpu_data[cpu]; + if (kvm_read(kd, readptr, buf, nbytes) != nbytes) { + _kvm_err(kd, kd->program, "unable to read per-CPU data"); + free(buf); + return ((void *)-1); + } + + /* Fetch the valid cpuset_t objects. */ + CPU_ZERO((cpuset_t *)(buf + nbytes)); + CPU_ZERO((cpuset_t *)(buf + nbytes + sizeof(cpuset_t))); + readptr += nbytes; + if (kvm_read(kd, readptr, buf + nbytes, kcpusetsize) != kcpusetsize) { + _kvm_err(kd, kd->program, "unable to read per-CPU data"); + free(buf); + return ((void *)-1); + } + readptr += kcpusetsize; + if (kvm_read(kd, readptr, buf + nbytes + sizeof(cpuset_t), + kcpusetsize) != kcpusetsize) { _kvm_err(kd, kd->program, "unable to read per-CPU data"); free(buf); return ((void *)-1); diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c index 4aae61a..485a4f2 100644 --- a/lib/libmemstat/memstat_uma.c +++ b/lib/libmemstat/memstat_uma.c @@ -27,6 +27,7 @@ */ #include <sys/param.h> +#include <sys/cpuset.h> #include <sys/sysctl.h> #define LIBMEMSTAT /* Cause vm_page.h not to include opt_vmpage.h */ @@ -44,6 +45,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include "memstat.h" #include "memstat_internal.h" @@ -313,7 +315,8 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) struct uma_keg *kzp, kz; int hint_dontsearch, i, mp_maxid, ret; char name[MEMTYPE_MAXNAME]; - __cpumask_t all_cpus; + cpuset_t all_cpus; + long cpusetsize; kvm_t *kvm; kvm = (kvm_t *)kvm_handle; @@ -337,7 +340,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) list->mtl_error = ret; return (-1); } - ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0); + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { + list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; + return (-1); + } + CPU_ZERO(&all_cpus); + ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); if (ret != 0) { list->mtl_error = ret; return (-1); @@ -407,7 +416,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) if (kz.uk_flags & UMA_ZFLAG_INTERNAL) goto skip_percpu; for (i = 0; i < mp_maxid + 1; i++) { - if ((all_cpus & (1 << i)) == 0) + if (!CPU_ISSET(i, &all_cpus)) continue; ucp = &ucp_array[i]; mtp->mt_numallocs += ucp->uc_allocs; diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 57341c9..29e66c5 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -78,7 +78,7 @@ static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); -static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +static void acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *); #endif #define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) @@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu) #define BIOS_WARM (0x0a) static void -acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus) { uint32_t mpbioswarmvec; int cpu; @@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { - if ((wakeup_cpus & (1 << cpu)) == 0) + if (!CPU_ISSET(cpu, wakeup_cpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ @@ -214,7 +214,7 @@ int acpi_sleep_machdep(struct acpi_softc *sc, int state) { #ifdef SMP - cpumask_t wakeup_cpus; + cpuset_t wakeup_cpus; #endif register_t cr3, rf; ACPI_STATUS status; @@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) if (savectx(susppcbs[0])) { #ifdef SMP - if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { - device_printf(sc->acpi_dev, - "Failed to suspend APs: CPU mask = 0x%jx\n", - (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + if (!CPU_EMPTY(&wakeup_cpus) && + suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, "Failed to suspend APs\n"); goto out; } #endif @@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef SMP - if (wakeup_cpus != 0) - acpi_wakeup_cpus(sc, wakeup_cpus); + if (!CPU_EMPTY(&wakeup_cpus)) + acpi_wakeup_cpus(sc, &wakeup_cpus); #endif acpi_resync_clock(sc); ret = 0; @@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) out: #ifdef SMP - if (wakeup_cpus != 0) + if (!CPU_EMPTY(&wakeup_cpus)) restart_cpus(wakeup_cpus); #endif diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c index 4edef81..3a89531 100644 --- a/sys/amd64/amd64/intr_machdep.c +++ b/sys/amd64/amd64/intr_machdep.c @@ -443,8 +443,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -466,7 +465,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -497,7 +496,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -510,6 +509,9 @@ intr_shuffle_irqs(void *arg __unused) struct intsrc *isrc; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index c53d10a..d72afd6 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -125,7 +126,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -161,7 +162,7 @@ static void release_aps(void *dummy); static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; static u_int bootMP_size; @@ -337,7 +338,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -481,7 +482,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -608,6 +609,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; @@ -739,19 +741,22 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - + CPU_OR(&logical_cpus_mask, &tcpuset); + /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -894,6 +899,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus, tcpuset; vm_offset_t va = boot_address + KERNBASE; u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; @@ -958,11 +964,14 @@ start_all_aps(void) panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + tcpuset = PCPU_GET(cpumask); + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1091,6 +1100,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, #endif /* COUNT_XINVLTLB_HITS */ /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + +/* * Flush the TLB on all other CPU's */ static void @@ -1114,28 +1147,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (cpumask_t)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_rflags() & PSL_I)) @@ -1144,39 +1168,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (cpumask_t)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, + cpu, vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1223,7 +1233,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1235,7 +1245,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1247,7 +1257,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1300,7 +1310,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1310,12 +1320,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1333,7 +1343,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1346,8 +1356,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1357,7 +1369,8 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); @@ -1366,7 +1379,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1374,11 +1387,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1390,23 +1405,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1421,7 +1438,7 @@ cpustop_handler(void) void cpususpend_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; register_t cr3, rf; u_int cpu; @@ -1433,7 +1450,7 @@ cpususpend_handler(void) if (savectx(susppcbs[cpu])) { wbinvd(); - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); } else { pmap_init_pat(); PCPU_SET(switchtime, 0); @@ -1441,11 +1458,11 @@ cpususpend_handler(void) } /* Wait for resume */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); /* Restore CR3 and enable interrupts */ load_cr3(cr3); @@ -1473,30 +1490,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); static int sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) { - cpumask_t mask; + cpuset_t mask; int error; mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); + error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req); if (error || !req->newptr) return (error); - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; + CPU_OR(&mask, &hyperthreading_cpus_mask); - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); + if (CPU_SUBSET(&mask, &all_cpus)) + CPU_CLR(0, &mask); hlt_cpus_mask = mask; return (error); } -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", +SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S", "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); static int @@ -1510,15 +1527,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) return (error); if (disable) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); else - hlt_cpus_mask &= ~logical_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hlt_logical_cpus = disable; return (error); @@ -1545,18 +1562,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) #endif if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask); else - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hyperthreading_allowed = allowed; return (error); @@ -1566,7 +1583,7 @@ static void cpu_hlt_setup(void *dummy __unused) { - if (logical_cpus_mask != 0) { + if (!CPU_EMPTY(&logical_cpus_mask)) { TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", &hlt_logical_cpus); sysctl_ctx_init(&logical_cpu_clist); @@ -1580,20 +1597,21 @@ cpu_hlt_setup(void *dummy __unused) &logical_cpus_mask, 0, ""); if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); /* * If necessary for security purposes, force * hyperthreading off, regardless of the value * of hlt_logical_cpus. */ - if (hyperthreading_cpus_mask) { + if (!CPU_EMPTY(&hyperthreading_cpus_mask)) { SYSCTL_ADD_PROC(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hyperthreading_allowed, "IU", ""); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, + &hyperthreading_cpus_mask); } } } @@ -1602,7 +1620,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); int mp_grab_cpu_hlt(void) { - cpumask_t mask; + cpuset_t mask; #ifdef MP_WATCHDOG u_int cpuid; #endif @@ -1615,7 +1633,7 @@ mp_grab_cpu_hlt(void) #endif retval = 0; - while (mask & hlt_cpus_mask) { + while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) { retval = 1; __asm __volatile("sti; hlt" : : : "memory"); } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index c9ff9bc..025ca5f 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* @@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -995,8 +999,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1032,26 +1044,28 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1065,7 +1079,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1083,7 +1097,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #ifdef SMP - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~PCPU_GET(cpumask); - pmap->pm_active |= PCPU_GET(cpumask); + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 972484a..13f5cd0 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/pioctl.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sf_buf.h> #include <sys/smp.h> #include <sys/sysctl.h> @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> #include <machine/md_var.h> #include <machine/pcb.h> +#include <machine/smp.h> #include <machine/specialreg.h> #include <machine/tss.h> @@ -512,11 +514,13 @@ cpu_set_user_tls(struct thread *td, void *tls_base) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); + CPU_SETOF(cpu_reset_proxyid, &tcrp); + stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); @@ -527,24 +531,28 @@ void cpu_reset() { #ifdef SMP - cpumask_t map; + cpuset_t map; u_int cnt; if (smp_active) { - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map != 0) { + sched_pin(); + map = PCPU_GET(other_cpus); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); + sched_unpin(); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ - atomic_store_rel_int(&started_cpus, 1 << 0); + CPU_SETOF(0, &started_cpus); + wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) @@ -556,7 +564,8 @@ cpu_reset() while (1); /* NOTREACHED */ - } + } else + sched_unpin(); DELAY(1000000); } diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h index 89d2e86..13dc3ea 100644 --- a/sys/amd64/include/_types.h +++ b/sys/amd64/include/_types.h @@ -61,7 +61,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 7a62851..1b8108a 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -152,6 +152,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -251,7 +252,7 @@ struct pmap { struct mtx pm_mtx; pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index ec107f9..a009b35 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -63,17 +63,17 @@ void ipi_all_but_self(u_int ipi); void ipi_bitmap_handler(struct trapframe frame); void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 087a744..cecf363 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt cpu_cpwait(); PMAP_LOCK_INIT(kernel_pmap); - kernel_pmap->pm_active = -1; + CPU_FILL(&kernel_pmap->pm_active); kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; TAILQ_INIT(&kernel_pmap->pm_pvlist); @@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap) pmap_alloc_l1(pmap); bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h index 48dd2a7..d8386f3 100644 --- a/sys/arm/include/_types.h +++ b/sys/arm/include/_types.h @@ -67,7 +67,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef double __double_t; typedef double __float_t; diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h index 701390a..3d63432 100644 --- a/sys/arm/include/pmap.h +++ b/sys/arm/include/pmap.h @@ -62,6 +62,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -134,7 +135,7 @@ struct pmap { struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; pd_entry_t *pm_pdir; /* KVA of page directory */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ }; diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c index 6f93663..9ba2fd3 100644 --- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c +++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c @@ -123,7 +123,9 @@ reprogram(cyb_arg_t arg __unused, hrtime_t exp) static void xcall(cyb_arg_t arg __unused, cpu_t *c, cyc_func_t func, void *param) { + cpuset_t cpus; - smp_rendezvous_cpus((cpumask_t)1 << c->cpuid, + CPU_SETOF(c->cpuid, &cpus); + smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param); } diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c index a081f67..0b86eac 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c @@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -374,7 +374,7 @@ dtrace_gethrtime_init(void *arg) { struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; + cpuset_t map; int i; /* @@ -412,7 +412,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c index 2753ffc..412fc38 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -30,6 +30,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/types.h> +#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/kmem.h> @@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -372,9 +373,9 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + cpuset_t map; struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; int i; /* @@ -412,7 +413,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 94311c6..b84d0c5 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -432,7 +432,10 @@ options KTRACE_REQUEST_POOL=101 # defined by the KTR_* constants in <sys/ktr.h>. KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log -# events, with bit X corresponding to CPU X. KTR_VERBOSE enables +# events, with bit X corresponding to CPU X. The layout of the string +# passed as KTR_CPUMASK must match a serie of bitmasks each of them +# separated by the ", " characters (ie: +# KTR_CPUMASK=("0xAF, 0xFFFFFFFFFFFFFFFF")). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. @@ -441,7 +444,7 @@ options KTR options KTR_ENTRIES=1024 options KTR_COMPILE=(KTR_INTR|KTR_PROC) options KTR_MASK=KTR_INTR -options KTR_CPUMASK=0x3 +options KTR_CPUMASK=("0x3") options KTR_VERBOSE # diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index d6225d8..4cfcea8 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg) * had already processed the interrupt). We don't * lose the interrupt sample. */ - atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid))); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); pmc_process_samples(PCPU_GET(cpuid)); break; @@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, done: /* mark CPU as needing processing */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); return (error); } @@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu) break; if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); break; } @@ -4782,7 +4782,7 @@ pmc_cleanup(void) PMCDBG(MOD,INI,0, "%s", "cleanup"); /* switch off sampling */ - pmc_cpumask = 0; + CPU_ZERO(&pmc_cpumask); pmc_intr = NULL; sx_xlock(&pmc_sx); diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index c03d536..0f44181 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -203,24 +203,29 @@ xctrl_suspend() unsigned long max_pfn, start_info_mfn; #ifdef SMP - cpumask_t map; + struct thread *td; + cpuset_t map; /* * Bind us to CPU 0 and stop any other VCPUs. */ - thread_lock(curthread); - sched_bind(curthread, 0); - thread_unlock(curthread); + td = curthread; + thread_lock(td); + sched_bind(td, 0); + thread_unlock(td); KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0")); - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map) + sched_pin(); + map = PCPU_GET(other_cpus); + sched_unpin(); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) stop_cpus(map); #endif if (DEVICE_SUSPEND(root_bus) != 0) { printf("xen_suspend: device_suspend failed\n"); #ifdef SMP - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif return; @@ -289,7 +294,7 @@ xctrl_suspend() thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif } diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 74c70ff..30497a4 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -672,7 +672,7 @@ static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP - return ((hlt_cpus_mask & (1 << cpu)) != 0); + return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c index 77b8004..56529f7 100644 --- a/sys/i386/i386/intr_machdep.c +++ b/sys/i386/i386/intr_machdep.c @@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -432,7 +431,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused) return; #endif + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 904af80..be603eb 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -173,7 +174,7 @@ static u_long *ipi_hardclock_counts[MAXCPU]; * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -210,7 +211,7 @@ static void release_aps(void *dummy); static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; @@ -385,7 +386,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -527,7 +528,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -662,6 +663,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; vm_offset_t addr; int gsel_tss; @@ -786,19 +788,22 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -935,6 +940,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus; #ifndef PC98 u_char mpbiosreason; #endif @@ -994,11 +1000,13 @@ start_all_aps(void) } CHECK_PRINT("trace"); /* show checkpoints */ - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1195,6 +1203,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, #endif /* COUNT_XINVLTLB_HITS */ /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + +/* * Flush the TLB on all other CPU's */ static void @@ -1218,28 +1250,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1248,39 +1271,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1327,7 +1336,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1339,7 +1348,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1351,7 +1360,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1404,7 +1413,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1414,12 +1423,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1437,7 +1446,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1450,8 +1459,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1461,7 +1472,9 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } @@ -1469,7 +1482,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1477,11 +1490,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1493,23 +1508,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1536,30 +1553,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); static int sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) { - cpumask_t mask; + cpuset_t mask; int error; mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); + error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req); if (error || !req->newptr) return (error); - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; + CPU_OR(&mask, &hyperthreading_cpus_mask); - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); + if (CPU_SUBSET(&mask, &all_cpus)) + CPU_CLR(0, &mask); hlt_cpus_mask = mask; return (error); } -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", +SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, + CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S", "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); static int @@ -1573,15 +1590,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) return (error); if (disable) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); else - hlt_cpus_mask &= ~logical_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hlt_logical_cpus = disable; return (error); @@ -1608,18 +1625,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) #endif if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; + CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask); else - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask); - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) + if (!CPU_EMPTY(&logical_cpus_mask) && + CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask)) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); + if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus)) + CPU_CLR(0, &hlt_cpus_mask); hyperthreading_allowed = allowed; return (error); @@ -1629,7 +1646,7 @@ static void cpu_hlt_setup(void *dummy __unused) { - if (logical_cpus_mask != 0) { + if (!CPU_EMPTY(&logical_cpus_mask)) { TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", &hlt_logical_cpus); sysctl_ctx_init(&logical_cpu_clist); @@ -1643,20 +1660,21 @@ cpu_hlt_setup(void *dummy __unused) &logical_cpus_mask, 0, ""); if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; + CPU_OR(&hlt_cpus_mask, &logical_cpus_mask); /* * If necessary for security purposes, force * hyperthreading off, regardless of the value * of hlt_logical_cpus. */ - if (hyperthreading_cpus_mask) { + if (!CPU_EMPTY(&hyperthreading_cpus_mask)) { SYSCTL_ADD_PROC(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hyperthreading_allowed, "IU", ""); if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; + CPU_OR(&hlt_cpus_mask, + &hyperthreading_cpus_mask); } } } @@ -1665,7 +1683,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); int mp_grab_cpu_hlt(void) { - cpumask_t mask; + cpuset_t mask; #ifdef MP_WATCHDOG u_int cpuid; #endif @@ -1678,7 +1696,7 @@ mp_grab_cpu_hlt(void) #endif retval = 0; - while (mask & hlt_cpus_mask) { + while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) { retval = 1; __asm __volatile("sti; hlt" : : : "memory"); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index d10bbe5..3f9248d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); @@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg) pd_entry_t *pde; pmap_t pmap; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); + /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being @@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg) pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } + } else + sched_unpin(); } static void @@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1058,21 +1075,23 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); @@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1095,7 +1114,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1113,7 +1132,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap) #endif } - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1886,7 +1905,7 @@ retry: * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1895,36 +1914,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1932,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1949,6 +1974,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1968,7 +1994,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -5078,11 +5104,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 232e1a1..a084e09 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -573,11 +573,13 @@ kvtop(void *addr) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); + CPU_SETOF(cpu_reset_proxyid, &tcrp); + stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); @@ -596,25 +598,29 @@ cpu_reset() #endif #ifdef SMP - cpumask_t map; + cpuset_t map; u_int cnt; if (smp_active) { - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map != 0) { + sched_pin(); + map = PCPU_GET(other_cpus); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); + sched_unpin(); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ /* XXX: restart_cpus(1 << 0); */ - atomic_store_rel_int(&started_cpus, (1 << 0)); + CPU_SETOF(0, &started_cpus); + wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) @@ -626,7 +632,8 @@ cpu_reset() while (1); /* NOTREACHED */ - } + } else + sched_unpin(); DELAY(1000000); } @@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags) struct sf_head *hash_list; struct sf_buf *sf; #ifdef SMP - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; #endif int error; @@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags) */ #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - sf->cpumask = 0; + CPU_ZERO(&sf->cpumask); shootdown: sched_pin(); cpumask = PCPU_GET(cpumask); - if ((sf->cpumask & cpumask) == 0) { - sf->cpumask |= cpumask; + if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) { + CPU_OR(&sf->cpumask, &cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { - other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask; - if (other_cpus != 0) { - sf->cpumask |= other_cpus; + other_cpus = PCPU_GET(other_cpus); + CPU_NAND(&other_cpus, &sf->cpumask); + if (!CPU_EMPTY(&other_cpus)) { + CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva); } } - sched_unpin(); + sched_unpin(); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index 7a969fe..3194fd6 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -69,7 +69,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef unsigned long __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef long double __double_t; typedef long double __float_t; diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index eeada2e..3012a00 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -155,6 +155,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -433,7 +434,7 @@ struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ #ifdef PAE diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h index 7bc1095..415dcbb 100644 --- a/sys/i386/include/sf_buf.h +++ b/sys/i386/include/sf_buf.h @@ -29,6 +29,7 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ +#include <sys/_cpuset.h> #include <sys/queue.h> struct vm_page; @@ -40,7 +41,7 @@ struct sf_buf { vm_offset_t kva; /* va of mapping */ int ref_count; /* usage of this mapping */ #ifdef SMP - cpumask_t cpumask; /* cpus on which mapping is valid */ + cpuset_t cpumask; /* cpus on which mapping is valid */ #endif }; diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index d364cd9..33b2578 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -66,17 +66,17 @@ void ipi_bitmap_handler(struct trapframe frame); #endif void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #ifdef XEN void ipi_to_irq_init(void); diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 2919570..2d05596 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -116,7 +117,7 @@ volatile int smp_tlb_wait; typedef void call_data_func_t(uintptr_t , uintptr_t); static u_int logical_cpus; -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -149,7 +150,7 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); @@ -239,7 +240,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -293,7 +294,8 @@ cpu_mp_start(void) start_all_aps(); /* Setup the initial logical CPUs info. */ - logical_cpus = logical_cpus_mask = 0; + logical_cpus = 0; + CPU_ZERO(&logical_cpus_mask); if (cpu_feature & CPUID_HTT) logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; @@ -521,6 +523,7 @@ xen_smp_intr_init_cpus(void *unused) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; vm_offset_t addr; int gsel_tss; @@ -600,18 +603,21 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); #if 0 if (bootverbose) lapic_dump("AP"); @@ -725,6 +731,7 @@ assign_cpu_ids(void) int start_all_aps(void) { + cpuset_t tallcpus; int x,apic_id, cpu; struct pcpu *pc; @@ -778,12 +785,14 @@ start_all_aps(void) panic("bye-bye"); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); @@ -1012,29 +1021,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; struct _call_data data; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + critical_enter(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + critical_exit(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1046,10 +1046,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; @@ -1092,7 +1102,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1101,7 +1111,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1110,7 +1120,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1122,7 +1132,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1132,11 +1142,11 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } @@ -1155,7 +1165,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1167,23 +1177,27 @@ ipi_cpu(int cpu, u_int ipi) void ipi_all_but_self(u_int ipi) { + cpuset_t other_cpus; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ + sched_pin(); + other_cpus = PCPU_GET(other_cpus); + sched_unpin(); if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - ipi_selected(PCPU_GET(other_cpus), ipi); + ipi_selected(other_cpus, ipi); } int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1191,11 +1205,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1207,20 +1223,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - int cpu = PCPU_GET(cpuid); - int cpumask = PCPU_GET(cpumask); + cpuset_t cpumask; + int cpu; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + cpu = PCPU_GET(cpuid); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index eb3c803..3efa4f1 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); @@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", pmap, sva, eva); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); PT_UPDATES_FLUSH(); } @@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); PT_UPDATES_FLUSH(); @@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap) CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap) #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap) } xen_flush_queue(); vm_page_unlock_queues(); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1686,7 +1688,7 @@ retry: * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1695,36 +1697,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1732,7 +1740,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1749,6 +1757,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1768,7 +1777,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -4123,11 +4132,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c index b6b0bef..15afea0 100644 --- a/sys/ia64/ia64/mp_machdep.c +++ b/sys/ia64/ia64/mp_machdep.c @@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf) static u_int ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf) { - cpumask_t mybit; + cpuset_t mybit; PCPU_INC(md.stats.pcs_nstops); mybit = PCPU_GET(cpumask); savectx(PCPU_PTR(md.pcb)); - atomic_set_int(&stopped_cpus, mybit); - while ((started_cpus & mybit) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &mybit); + while (!CPU_OVERLAP(&started_cpus, &mybit)) cpu_spinwait(); - atomic_clear_int(&started_cpus, mybit); - atomic_clear_int(&stopped_cpus, mybit); + CPU_NAND_ATOMIC(&started_cpus, &mybit); + CPU_NAND_ATOMIC(&stopped_cpus, &mybit); return (0); } @@ -286,7 +286,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) cpuid = (IA64_LID_GET_SAPIC_ID(ia64_get_lid()) == sapic_id) ? 0 : smp_cpus++; - KASSERT((all_cpus & (1UL << cpuid)) == 0, + KASSERT(!CPU_ISSET(cpuid, &all_cpus), ("%s: cpu%d already in CPU map", __func__, acpi_id)); if (cpuid != 0) { @@ -300,7 +300,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) pc->pc_acpi_id = acpi_id; pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id); - all_cpus |= (1UL << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &all_cpus); } void @@ -359,7 +359,8 @@ cpu_mp_start() STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { pc->pc_md.current_pmap = kernel_pmap; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); /* The BSP is obviously running already. */ if (pc->pc_cpuid == 0) { pc->pc_md.awake = 1; @@ -458,12 +459,12 @@ cpu_mp_unleash(void *dummy) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h index 8fc1be2..0c2f5cc 100644 --- a/sys/ia64/include/_types.h +++ b/sys/ia64/include/_types.h @@ -59,7 +59,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h index 26557a7..d2aff76 100644 --- a/sys/ia64/include/smp.h +++ b/sys/ia64/include/smp.h @@ -14,6 +14,8 @@ #ifndef LOCORE +#include <sys/_cpuset.h> + struct pcpu; struct ia64_ap_state { @@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup; void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void ipi_send(struct pcpu *, int ipi); #endif /* !LOCORE */ diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 6489ffb..e1f2801 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/cpuset.h> #include <sys/sx.h> #include <sys/queue.h> +#include <sys/libkern.h> #include <sys/limits.h> #include <sys/bus.h> #include <sys/interrupt.h> @@ -617,6 +618,86 @@ out: } /* + * Calculate the ffs() of the cpuset. + */ +int +cpusetobj_ffs(const cpuset_t *set) +{ + size_t i; + int cbit; + + cbit = 0; + for (i = 0; i < _NCPUWORDS; i++) { + if (set->__bits[i] != 0) { + cbit = ffsl(set->__bits[i]); + cbit += i * _NCPUBITS; + break; + } + } + return (cbit); +} + +/* + * Return a string representing a valid layout for a cpuset_t object. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +char * +cpusetobj_strprint(char *buf, const cpuset_t *set) +{ + char *tbuf; + size_t i, bytesp, bufsiz; + + tbuf = buf; + bytesp = 0; + bufsiz = CPUSETBUFSIZ; + + for (i = _NCPUWORDS - 1; i > 0; i--) { + bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]); + bufsiz -= bytesp; + tbuf += bytesp; + } + snprintf(tbuf, bufsiz, "%lx", set->__bits[0]); + return (buf); +} + +/* + * Build a valid cpuset_t object from a string representation. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +int +cpusetobj_strscan(cpuset_t *set, const char *buf) +{ + u_int nwords; + int i, ret; + + if (strlen(buf) > CPUSETBUFSIZ - 1) + return (-1); + + /* Allow to pass a shorter version of the mask when necessary. */ + nwords = 1; + for (i = 0; buf[i] != '\0'; i++) + if (buf[i] == ',') + nwords++; + if (nwords > _NCPUWORDS) + return (-1); + + CPU_ZERO(set); + for (i = nwords - 1; i > 0; i--) { + ret = sscanf(buf, "%lx, ", &set->__bits[i]); + if (ret == 0 || ret == -1) + return (-1); + buf = strstr(buf, " "); + if (buf == NULL) + return (-1); + buf++; + } + ret = sscanf(buf, "%lx", &set->__bits[0]); + if (ret == 0 || ret == -1) + return (-1); + return (0); +} + +/* * Apply an anonymous mask to a single thread. */ int @@ -754,12 +835,7 @@ cpuset_init(void *arg) { cpuset_t mask; - CPU_ZERO(&mask); -#ifdef SMP - mask.__bits[0] = all_cpus; -#else - mask.__bits[0] = 1; -#endif + mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c index 2e5e06f..eff3d5b 100644 --- a/sys/kern/kern_ktr.c +++ b/sys/kern/kern_ktr.c @@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$"); #include "opt_alq.h" #include <sys/param.h> +#include <sys/queue.h> #include <sys/alq.h> #include <sys/cons.h> +#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/libkern.h> @@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$"); #define KTR_MASK (0) #endif -#ifndef KTR_CPUMASK -#define KTR_CPUMASK (~0) -#endif - #ifndef KTR_TIME #define KTR_TIME get_cyclecount() #endif @@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); -int ktr_cpumask = KTR_CPUMASK; -TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask); -SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW, - &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled"); - int ktr_mask = KTR_MASK; TUNABLE_INT("debug.ktr.mask", &ktr_mask); SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW, @@ -106,6 +99,54 @@ int ktr_version = KTR_VERSION; SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); +cpuset_t ktr_cpumask; +static char ktr_cpumask_str[CPUSETBUFSIZ]; +TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); + +static void +ktr_cpumask_initializer(void *dummy __unused) +{ + + CPU_FILL(&ktr_cpumask); +#ifdef KTR_CPUMASK + if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1) + CPU_FILL(&ktr_cpumask); +#endif + + /* + * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be + * already set, if necessary. + */ + if (ktr_cpumask_str[0] != '\0' && + cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1) + CPU_FILL(&ktr_cpumask); +} +SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY, + ktr_cpumask_initializer, NULL); + +static int +sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS) +{ + char lktr_cpumask_str[CPUSETBUFSIZ]; + cpuset_t imask; + int error; + + cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask); + error = sysctl_handle_string(oidp, lktr_cpumask_str, + sizeof(lktr_cpumask_str), req); + if (error != 0 || req->newptr == NULL) + return (error); + if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1) + return (EINVAL); + CPU_COPY(&imask, &ktr_cpumask); + + return (error); +} +SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask, + CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0, + sysctl_debug_ktr_cpumask, "S", + "Bitmask of CPUs on which KTR logging is enabled"); + volatile int ktr_idx = 0; struct ktr_entry ktr_buf[KTR_ENTRIES]; @@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, if ((ktr_mask & mask) == 0) return; cpu = KTR_CPU; - if (((1 << cpu) & ktr_cpumask) == 0) + if (!CPU_ISSET(cpu, &ktr_cpumask)) return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c index 7532378..8d9c7c0 100644 --- a/sys/kern/kern_pmc.c +++ b/sys/kern/kern_pmc.c @@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL; int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; /* Bitmask of CPUs requiring servicing at hardclock time */ -volatile cpumask_t pmc_cpumask; +volatile cpuset_t pmc_cpumask; /* * A global count of SS mode PMCs. When non-zero, this means that @@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu) { #ifdef SMP return (pmc_cpu_is_present(cpu) && - (hlt_cpus_mask & (1 << cpu)) == 0); + !CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (1); #endif @@ -139,7 +139,7 @@ int pmc_cpu_is_primary(int cpu) { #ifdef SMP - return ((logical_cpus_mask & (1 << cpu)) == 0); + return (!CPU_ISSET(cpu, &logical_cpus_mask)); #else return (1); #endif diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 7f2b4e7..3214e1b 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) pc = pcpu_find(curcpu); /* Check if we just need to do a proper critical_exit. */ - if (!(pc->pc_cpumask & rm->rm_writecpus)) { + if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) { critical_exit(); return (1); } @@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) critical_enter(); pc = pcpu_find(curcpu); - rm->rm_writecpus &= ~pc->pc_cpumask; + CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask); rm_tracker_add(pc, tracker); sched_pin(); critical_exit(); @@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) * Fast path to combine two common conditions into a single * conditional jump. */ - if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask))) + if (0 == (td->td_owepreempt | + CPU_OVERLAP(&rm->rm_writecpus, &pc->pc_cpumask))) return (1); /* We do not have a read token and need to acquire one. */ @@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm) { struct rm_priotracker *prio; struct turnstile *ts; - cpumask_t readcpus; + cpuset_t readcpus; if (rm->lock_object.lo_flags & RM_SLEEPABLE) sx_xlock(&rm->rm_lock_sx); else mtx_lock(&rm->rm_lock_mtx); - if (rm->rm_writecpus != all_cpus) { + if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { /* Get all read tokens back */ - - readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus); + readcpus = all_cpus; + CPU_NAND(&readcpus, &rm->rm_writecpus); rm->rm_writecpus = all_cpus; /* diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 519cae5..592bb80 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -156,7 +156,7 @@ static struct runq runq; static struct runq runq_pcpu[MAXCPU]; long runq_length[MAXCPU]; -static cpumask_t idle_cpus_mask; +static cpuset_t idle_cpus_mask; #endif struct pcpuidlestat { @@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); #ifdef SMP - idle_cpus_mask &= ~PCPU_GET(cpumask); + /* Spinlock held here, assume no migration. */ + CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif } else { if (TD_IS_RUNNING(td)) { @@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #ifdef SMP if (td->td_flags & TDF_IDLETD) - idle_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); @@ -1054,7 +1055,8 @@ static int forward_wakeup(int cpunum) { struct pcpu *pc; - cpumask_t dontuse, id, map, map2, me; + cpuset_t dontuse, id, map, map2, me; + int iscpuset; mtx_assert(&sched_lock, MA_OWNED); @@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum) /* * Check the idle mask we received against what we calculated * before in the old version. + * + * Also note that sched_lock is held now, thus no migration is + * expected. */ me = PCPU_GET(cpumask); /* Don't bother if we should be doing it ourself. */ - if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) + if (CPU_OVERLAP(&me, &idle_cpus_mask) && + (cpunum == NOCPU || CPU_ISSET(cpunum, &me))) return (0); - dontuse = me | stopped_cpus | hlt_cpus_mask; - map2 = 0; + dontuse = me; + CPU_OR(&dontuse, &stopped_cpus); + CPU_OR(&dontuse, &hlt_cpus_mask); + CPU_ZERO(&map2); if (forward_wakeup_use_loop) { STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((id & dontuse) == 0 && + if (!CPU_OVERLAP(&id, &dontuse) && pc->pc_curthread == pc->pc_idlethread) { - map2 |= id; + CPU_OR(&map2, &id); } } } if (forward_wakeup_use_mask) { - map = 0; - map = idle_cpus_mask & ~dontuse; + map = idle_cpus_mask; + CPU_NAND(&map, &dontuse); /* If they are both on, compare and use loop if different. */ if (forward_wakeup_use_loop) { - if (map != map2) { + if (CPU_CMP(&map, &map2)) { printf("map != map2, loop method preferred\n"); map = map2; } @@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum) /* If we only allow a specific CPU, then mask off all the others. */ if (cpunum != NOCPU) { KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); - map &= (1 << cpunum); + iscpuset = CPU_ISSET(cpunum, &map); + if (iscpuset == 0) + CPU_ZERO(&map); + else + CPU_SETOF(cpunum, &map); } - if (map) { + if (!CPU_EMPTY(&map)) { forward_wakeups_delivered++; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((map & id) == 0) + if (!CPU_OVERLAP(&map, &id)) continue; if (cpu_idle_wakeup(pc->pc_cpuid)) - map &= ~id; + CPU_NAND(&map, &id); } - if (map) + if (!CPU_EMPTY(&map)) ipi_selected(map, IPI_AST); return (1); } @@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid) int cpri; pcpu = pcpu_find(cpuid); - if (idle_cpus_mask & pcpu->pc_cpumask) { + if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) { forward_wakeups_delivered++; if (!cpu_idle_wakeup(cpuid)) ipi_cpu(cpuid, IPI_AST); @@ -1193,6 +1205,7 @@ void sched_add(struct thread *td, int flags) #ifdef SMP { + cpuset_t idle, me, tidlemsk; struct td_sched *ts; int forwarded = 0; int cpu; @@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags) kick_other_cpu(td->td_priority, cpu); } else { if (!single_cpu) { - cpumask_t me = PCPU_GET(cpumask); - cpumask_t idle = idle_cpus_mask & me; - if (!idle && ((flags & SRQ_INTR) == 0) && - (idle_cpus_mask & ~(hlt_cpus_mask | me))) + /* + * Thread spinlock is held here, assume no + * migration is possible. + */ + me = PCPU_GET(cpumask); + idle = idle_cpus_mask; + tidlemsk = idle; + CPU_AND(&idle, &me); + CPU_OR(&me, &hlt_cpus_mask); + CPU_NAND(&tidlemsk, &me); + + if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) && + !CPU_EMPTY(&tidlemsk)) forwarded = forward_wakeup(cpu); } diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index ac18e77..05267f3 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -564,7 +564,7 @@ struct cpu_search { #define CPUSET_FOREACH(cpu, mask) \ for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \ - if ((mask) & 1 << (cpu)) + if (CPU_ISSET(cpu, &mask)) static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low, struct cpu_search *high, const int match); @@ -2650,15 +2650,16 @@ static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, int indent) { + char cpusetbuf[CPUSETBUFSIZ]; int i, first; sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent, "", 1 + indent / 2, cg->cg_level); - sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "", - cg->cg_count, cg->cg_mask); + sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "", + cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); first = TRUE; for (i = 0; i < MAXCPU; i++) { - if ((cg->cg_mask & (1 << i)) != 0) { + if (CPU_ISSET(i, &cg->cg_mask)) { if (!first) sbuf_printf(sb, ", "); else diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 5d68ae2..1d67864 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -413,7 +413,8 @@ kdb_thr_ctx(struct thread *thr) #if defined(SMP) && defined(KDB_STOPPEDPCB) STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask)) + if (pc->pc_curthread == thr && + CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask)) return (KDB_STOPPEDPCB(pc)); } #endif diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index 5cb4f26..a6b3ae0 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -87,7 +87,7 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) KASSERT(cpuid >= 0 && cpuid < MAXCPU, ("pcpu_init: invalid cpuid %d", cpuid)); pcpu->pc_cpuid = cpuid; - pcpu->pc_cpumask = 1 << cpuid; + CPU_SETOF(cpuid, &pcpu->pc_cpumask); cpuid_to_pcpu[cpuid] = pcpu; STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 351f096..c38177b 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$"); #include "opt_sched.h" #ifdef SMP -volatile cpumask_t stopped_cpus; -volatile cpumask_t started_cpus; -cpumask_t hlt_cpus_mask; -cpumask_t logical_cpus_mask; +volatile cpuset_t stopped_cpus; +volatile cpuset_t started_cpus; +cpuset_t hlt_cpus_mask; +cpuset_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif /* This is used in modules that need to work in both SMP and UP. */ -cpumask_t all_cpus; +cpuset_t all_cpus; int mp_ncpus; /* export this for libkvm consumers. */ @@ -200,8 +200,11 @@ forward_signal(struct thread *td) * */ static int -generic_stop_cpus(cpumask_t map, u_int type) +generic_stop_cpus(cpuset_t map, u_int type) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif static volatile u_int stopping_cpu = NOCPU; int i; @@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type) if (!smp_started) return (0); - CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type); + CTR2(KTR_SMP, "stop_cpus(%s) with %u type", + cpusetobj_strprint(cpusetbuf, &map), type); if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, @@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type) ipi_selected(map, type); i = 0; - while ((stopped_cpus & map) != map) { + while (!CPU_SUBSET(&stopped_cpus, &map)) { /* spin */ cpu_spinwait(); i++; @@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type) } int -stop_cpus(cpumask_t map) +stop_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP)); } int -stop_cpus_hard(cpumask_t map) +stop_cpus_hard(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP_HARD)); @@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map) #if defined(__amd64__) int -suspend_cpus(cpumask_t map) +suspend_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_SUSPEND)); @@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map) * 1: ok */ int -restart_cpus(cpumask_t map) +restart_cpus(cpuset_t map) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif if (!smp_started) return 0; - CTR1(KTR_SMP, "restart_cpus(%x)", map); + CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); /* signal other cpus to restart */ - atomic_store_rel_int(&started_cpus, map); + CPU_COPY_STORE_REL(&map, &started_cpus); /* wait for each to clear its bit */ - while ((stopped_cpus & map) != 0) + while (CPU_OVERLAP(&stopped_cpus, &map)) cpu_spinwait(); return 1; @@ -409,13 +416,13 @@ smp_rendezvous_action(void) } void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { - int i, ncpus = 0; + int curcpumap, i, ncpus = 0; if (!smp_started) { if (setup_func != NULL) @@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map, } CPU_FOREACH(i) { - if (((1 << i) & map) != 0) + if (CPU_ISSET(i, &map)) ncpus++; } if (ncpus == 0) - panic("ncpus is 0 with map=0x%x", map); + panic("ncpus is 0 with non-zero map"); mtx_lock_spin(&smp_ipi_mtx); @@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map, * Signal other processors, which will enter the IPI with * interrupts off. */ - ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS); + curcpumap = CPU_ISSET(curcpu, &map); + CPU_CLR(curcpu, &map); + ipi_selected(map, IPI_RENDEZVOUS); /* Check if the current CPU is in the map */ - if ((map & (1 << curcpu)) != 0) + if (curcpumap != 0) smp_rendezvous_action(); /* @@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU]; struct cpu_group * smp_topo(void) { + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; struct cpu_group *top; /* @@ -530,9 +540,10 @@ smp_topo(void) if (top->cg_count != mp_ncpus) panic("Built bad topology at %p. CPU count %d != %d", top, top->cg_count, mp_ncpus); - if (top->cg_mask != all_cpus) - panic("Built bad topology at %p. CPU mask 0x%X != 0x%X", - top, top->cg_mask, all_cpus); + if (CPU_CMP(&top->cg_mask, &all_cpus)) + panic("Built bad topology at %p. CPU mask (%s) != (%s)", + top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), + cpusetobj_strprint(cpusetbuf2, &all_cpus)); return (top); } @@ -557,11 +568,13 @@ static int smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, int count, int flags, int start) { - cpumask_t mask; + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; + cpuset_t mask; int i; - for (mask = 0, i = 0; i < count; i++, start++) - mask |= (1 << start); + CPU_ZERO(&mask); + for (i = 0; i < count; i++, start++) + CPU_SET(start, &mask); child->cg_parent = parent; child->cg_child = NULL; child->cg_children = 0; @@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, child->cg_mask = mask; parent->cg_children++; for (; parent != NULL; parent = parent->cg_parent) { - if ((parent->cg_mask & child->cg_mask) != 0) - panic("Duplicate children in %p. mask 0x%X child 0x%X", - parent, parent->cg_mask, child->cg_mask); - parent->cg_mask |= child->cg_mask; + if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) + panic("Duplicate children in %p. mask (%s) child (%s)", + parent, + cpusetobj_strprint(cpusetbuf, &parent->cg_mask), + cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); + CPU_OR(&parent->cg_mask, &child->cg_mask); parent->cg_count += child->cg_count; } @@ -634,20 +649,20 @@ struct cpu_group * smp_topo_find(struct cpu_group *top, int cpu) { struct cpu_group *cg; - cpumask_t mask; + cpuset_t mask; int children; int i; - mask = (1 << cpu); + CPU_SETOF(cpu, &mask); cg = top; for (;;) { - if ((cg->cg_mask & mask) == 0) + if (!CPU_OVERLAP(&cg->cg_mask, &mask)) return (NULL); if (cg->cg_children == 0) return (cg); children = cg->cg_children; for (i = 0, cg = cg->cg_child; i < children; cg++, i++) - if ((cg->cg_mask & mask) != 0) + if (CPU_OVERLAP(&cg->cg_mask, &mask)) break; } return (NULL); @@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu) #else /* !SMP */ void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c index 78eafa6..efddee8 100644 --- a/sys/mips/cavium/octeon_mp.c +++ b/sys/mips/cavium/octeon_mp.c @@ -102,10 +102,18 @@ platform_init_ap(int cpuid) mips_wbflush(); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { - return (octeon_bootinfo->core_mask); + + CPU_ZERO(mask); + + /* + * XXX: hack in order to simplify CPU set building, assuming that + * core_mask is 32-bits. + */ + memcpy(mask, &octeon_bootinfo->core_mask, + sizeof(octeon_bootinfo->core_mask)); } struct cpu_group * diff --git a/sys/mips/include/_types.h b/sys/mips/include/_types.h index 4d57e20..2f23db6 100644 --- a/sys/mips/include/_types.h +++ b/sys/mips/include/_types.h @@ -73,7 +73,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __mips_n64 diff --git a/sys/mips/include/hwfunc.h b/sys/mips/include/hwfunc.h index 683aedb..a9e3285 100644 --- a/sys/mips/include/hwfunc.h +++ b/sys/mips/include/hwfunc.h @@ -28,6 +28,8 @@ #ifndef _MACHINE_HWFUNC_H_ #define _MACHINE_HWFUNC_H_ +#include <sys/_cpuset.h> + struct trapframe; struct timecounter; /* @@ -91,7 +93,7 @@ extern int platform_processor_id(void); /* * Return the cpumask of available processors. */ -extern cpumask_t platform_cpu_mask(void); +extern void platform_cpu_mask(cpuset_t *mask); /* * Return the topology of processors on this platform diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index e710635..90375eb 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -58,6 +58,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -83,7 +84,7 @@ struct pmap { pd_entry_t *pm_segtab; /* KVA of segment table */ TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in * pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct { u_int32_t asid:ASID_BITS; /* TLB address space tag */ u_int32_t gen:ASIDGEN_BITS; /* its generation number */ diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h index 58aaf03..0fcca9a 100644 --- a/sys/mips/include/smp.h +++ b/sys/mips/include/smp.h @@ -17,6 +17,8 @@ #ifdef _KERNEL +#include <sys/_cpuset.h> + #include <machine/pcb.h> /* @@ -33,7 +35,7 @@ void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void smp_init_secondary(u_int32_t cpuid); void mpentry(void); diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index 7191b37..79a3476 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/proc.h> #include <sys/lock.h> @@ -80,15 +81,16 @@ ipi_all_but_self(int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; - CTR3(KTR_SMP, "%s: cpus: %x, ipi: %x\n", __func__, cpus, ipi); - STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if ((cpus & pc->pc_cpumask) != 0) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) { + CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, + ipi); ipi_send(pc, ipi); + } } } @@ -108,7 +110,7 @@ static int mips_ipi_handler(void *arg) { int cpu; - cpumask_t cpumask; + cpuset_t cpumask; u_int ipi, ipi_bitmap; int bit; @@ -148,14 +150,14 @@ mips_ipi_handler(void *arg) tlb_save(); /* Indicate we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while ((started_cpus & cpumask) == 0) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) cpu_spinwait(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); CTR0(KTR_SMP, "IPI_STOP (restart)"); break; case IPI_PREEMPT: @@ -200,14 +202,22 @@ start_ap(int cpuid) void cpu_mp_setmaxid(void) { - cpumask_t cpumask; - - cpumask = platform_cpu_mask(); - mp_ncpus = bitcount32(cpumask); + cpuset_t cpumask; + int cpu, last; + + platform_cpu_mask(&cpumask); + mp_ncpus = 0; + last = 1; + while ((cpu = cpusetobj_ffs(&cpumask)) != 0) { + last = cpu; + cpu--; + CPU_CLR(cpu, &cpumask); + mp_ncpus++; + } if (mp_ncpus <= 0) mp_ncpus = 1; - mp_maxid = min(fls(cpumask), MAXCPU) - 1; + mp_maxid = min(last, MAXCPU) - 1; } void @@ -233,16 +243,16 @@ void cpu_mp_start(void) { int error, cpuid; - cpumask_t cpumask; + cpuset_t cpumask, ocpus; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - all_cpus = 0; - cpumask = platform_cpu_mask(); + CPU_ZERO(&all_cpus); + platform_cpu_mask(&cpumask); - while (cpumask != 0) { - cpuid = ffs(cpumask) - 1; - cpumask &= ~(1 << cpuid); + while (!CPU_EMPTY(&cpumask)) { + cpuid = cpusetobj_ffs(&cpumask) - 1; + CPU_CLR(cpuid, &cpumask); if (cpuid >= MAXCPU) { printf("cpu_mp_start: ignoring AP #%d.\n", cpuid); @@ -257,15 +267,19 @@ cpu_mp_start(void) if (bootverbose) printf("AP #%d started!\n", cpuid); } - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); } - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); } void smp_init_secondary(u_int32_t cpuid) { + cpuset_t ocpus; + /* TLB */ mips_wr_wired(0); tlb_invalidate_all(); @@ -303,7 +317,9 @@ smp_init_secondary(u_int32_t cpuid) CTR1(KTR_SMP, "SMP: AP CPU #%d launched", PCPU_GET(cpuid)); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); if (bootverbose) printf("SMP: AP CPU #%d launched.\n", PCPU_GET(cpuid)); diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 7f0f4f0..f7ea660 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -471,7 +471,7 @@ pmap_create_kernel_pagetable(void) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_segtab = kernel_segmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); TAILQ_INIT(&kernel_pmap->pm_pvlist); kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; kernel_pmap->pm_asid[0].gen = 0; @@ -630,10 +630,14 @@ pmap_invalidate_all_local(pmap_t pmap) tlb_invalidate_all(); return; } - if (pmap->pm_active & PCPU_GET(cpumask)) + sched_pin(); + if (CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { + sched_unpin(); tlb_invalidate_all_user(pmap); - else + } else { + sched_unpin(); pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + } } #ifdef SMP @@ -667,12 +671,16 @@ pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) tlb_invalidate_address(pmap, va); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_invalidate_address(pmap, va); } @@ -716,12 +724,16 @@ pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) tlb_update(pmap, va, pte); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_update(pmap, va, pte); } @@ -1041,7 +1053,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_segtab = kernel_segmap; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -1102,7 +1114,7 @@ pmap_pinit(pmap_t pmap) ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -2948,8 +2960,8 @@ pmap_activate(struct thread *td) oldpmap = PCPU_GET(curpmap); if (oldpmap) - atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); pmap_asid_alloc(pmap); if (td == curthread) { PCPU_SET(segbase, pmap->pm_segtab); @@ -3283,7 +3295,7 @@ pmap_kextract(vm_offset_t va) pt_entry_t *ptep; /* Is the kernel pmap initialized? */ - if (kernel_pmap->pm_active) { + if (!CPU_EMPTY(&kernel_pmap->pm_active)) { /* It's inside the virtual address range */ ptep = pmap_pte(kernel_pmap, va); if (ptep) { diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c index 4a1734a..836c605 100644 --- a/sys/mips/rmi/xlr_machdep.c +++ b/sys/mips/rmi/xlr_machdep.c @@ -614,11 +614,15 @@ platform_processor_id(void) return (xlr_hwtid_to_cpuid[xlr_cpu_id()]); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - (xlr_ncores * xlr_threads_per_core))); + CPU_ZERO(mask); + s = xlr_ncores * xlr_threads_per_core; + for (i = 0; i < s; i++) + CPU_SET(i, mask); } struct cpu_group * diff --git a/sys/mips/sibyte/sb_scd.c b/sys/mips/sibyte/sb_scd.c index e5ac23c..50b9987 100644 --- a/sys/mips/sibyte/sb_scd.c +++ b/sys/mips/sibyte/sb_scd.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/module.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <machine/resource.h> #include <machine/hwfunc.h> @@ -242,11 +243,15 @@ sb_clear_mailbox(int cpu, uint64_t val) sb_store64(regaddr, val); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - SYSREV_NUM_PROCESSORS(sb_read_sysrev()))); + CPU_ZERO(mask); + s = SYSREV_NUM_PROCESSORS(sb_read_sysrev()); + for (i = 0; i < s; i++) + CPU_SET(i, mask); } #endif /* SMP */ diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h index f6f9404..61b42d2 100644 --- a/sys/ofed/include/linux/list.h +++ b/sys/ofed/include/linux/list.h @@ -38,6 +38,7 @@ #include <sys/param.h> #include <sys/kernel.h> #include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 51c6f8a..be80455 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/msgbuf.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vmmeter.h> @@ -820,7 +823,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) PMAP_LOCK_INIT(kernel_pmap); for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /* * Set up the Open Firmware mappings @@ -942,7 +945,9 @@ moea_activate(mmu_t mmu, struct thread *td) pm = &td->td_proc->p_vmspace->vm_pmap; pmr = pm->pmap_phys; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, pmr); } @@ -952,7 +957,9 @@ moea_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~PCPU_GET(cpumask); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 12a1201..291d89b 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/msgbuf.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vmmeter.h> @@ -827,7 +830,7 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) #endif kernel_pmap->pmap_phys = kernel_pmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); PMAP_LOCK_INIT(kernel_pmap); @@ -995,7 +998,9 @@ moea64_activate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, pm->pm_slb); @@ -1010,7 +1015,9 @@ moea64_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~(PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, NULL); #else diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c index 90c73e0..d76664e 100644 --- a/sys/powerpc/booke/platform_bare.c +++ b/sys/powerpc/booke/platform_bare.c @@ -256,7 +256,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) int timeout; eebpcr = ccsr_read4(OCP85XX_EEBPCR); - if ((eebpcr & (pc->pc_cpumask << 24)) != 0) { + if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) { printf("%s: CPU=%d already out of hold-off state!\n", __func__, pc->pc_cpuid); return (ENXIO); @@ -274,7 +274,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) /* * Release AP from hold-off state */ - eebpcr |= (pc->pc_cpumask << 24); + eebpcr |= (1 << (pc->pc_cpuid + 24)); ccsr_write4(OCP85XX_EEBPCR, eebpcr); __asm __volatile("isync; msync"); diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index cabe58f..e1cd071 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include <sys/msgbuf.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/sched.h> #include <sys/smp.h> #include <sys/vmmeter.h> @@ -1225,7 +1226,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) PTE_VALID; } /* Mark kernel_pmap active on all CPUs */ - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /*******************************************************/ /* Final setup */ @@ -1480,7 +1481,7 @@ mmu_booke_pinit(mmu_t mmu, pmap_t pmap) PMAP_LOCK_INIT(pmap); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; - pmap->pm_active = 0; + CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); TAILQ_INIT(&pmap->pm_ptbl_list); @@ -1835,7 +1836,7 @@ mmu_booke_activate(mmu_t mmu, struct thread *td) mtx_lock_spin(&sched_lock); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); PCPU_SET(curpmap, pmap); if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE) @@ -1864,7 +1865,9 @@ mmu_booke_deactivate(mmu_t mmu, struct thread *td) CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x", __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); - atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } diff --git a/sys/powerpc/include/_types.h b/sys/powerpc/include/_types.h index fae2416..b0b582e 100644 --- a/sys/powerpc/include/_types.h +++ b/sys/powerpc/include/_types.h @@ -72,7 +72,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __LP64__ diff --git a/sys/powerpc/include/openpicvar.h b/sys/powerpc/include/openpicvar.h index 4fb9aa7..605dc0f 100644 --- a/sys/powerpc/include/openpicvar.h +++ b/sys/powerpc/include/openpicvar.h @@ -57,7 +57,7 @@ int openpic_common_attach(device_t, uint32_t); /* * PIC interface. */ -void openpic_bind(device_t dev, u_int irq, cpumask_t cpumask); +void openpic_bind(device_t dev, u_int irq, cpuset_t cpumask); void openpic_config(device_t, u_int, enum intr_trigger, enum intr_polarity); void openpic_dispatch(device_t, struct trapframe *); void openpic_enable(device_t, u_int, u_int); diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index 369ca9d..9166d04 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -66,6 +66,7 @@ #include <sys/queue.h> #include <sys/tree.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> #include <machine/sr.h> @@ -98,7 +99,7 @@ struct pmap { #else register_t pm_sr[16]; #endif - cpumask_t pm_active; + cpuset_t pm_active; struct pmap *pmap_phys; struct pmap_statistics pm_stats; @@ -175,7 +176,7 @@ void slb_free_user_cache(struct slb **); struct pmap { struct mtx pm_mtx; /* pmap mutex */ tlbtid_t pm_tid[MAXCPU]; /* TID to identify this pmap entries in TLB */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ /* Page table directory, array of pointers to page tables. */ diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h index cf95278..32fcfb4 100644 --- a/sys/powerpc/include/smp.h +++ b/sys/powerpc/include/smp.h @@ -40,9 +40,11 @@ #ifndef LOCORE +#include <sys/_cpuset.h> + void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); struct cpuref { uintptr_t cr_hwref; diff --git a/sys/powerpc/mpc85xx/openpic_fdt.c b/sys/powerpc/mpc85xx/openpic_fdt.c index 7cf18ea..1cd9369 100644 --- a/sys/powerpc/mpc85xx/openpic_fdt.c +++ b/sys/powerpc/mpc85xx/openpic_fdt.c @@ -37,11 +37,12 @@ __FBSDID("$FreeBSD$"); #include <machine/bus.h> #include <machine/intr_machdep.h> -#include <machine/openpicvar.h> #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> +#include <machine/openpicvar.h> + #include "pic_if.h" static int openpic_fdt_probe(device_t); diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c index f2bfa33..1e6342c 100644 --- a/sys/powerpc/powerpc/intr_machdep.c +++ b/sys/powerpc/powerpc/intr_machdep.c @@ -67,6 +67,7 @@ #include <sys/kernel.h> #include <sys/queue.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <sys/interrupt.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -98,7 +99,7 @@ struct powerpc_intr { u_int intline; u_int vector; u_int cntindex; - cpumask_t cpu; + cpuset_t cpu; enum intr_trigger trig; enum intr_polarity pol; }; @@ -205,7 +206,7 @@ intr_lookup(u_int irq) #ifdef SMP i->cpu = all_cpus; #else - i->cpu = 1; + CPU_SETOF(0, &i->cpu); #endif for (vector = 0; vector < INTR_VECTORS && vector <= nvectors; @@ -296,7 +297,7 @@ powerpc_assign_intr_cpu(void *arg, u_char cpu) if (cpu == NOCPU) i->cpu = all_cpus; else - i->cpu = 1 << cpu; + CPU_SETOF(cpu, &i->cpu); if (!cold && i->pic != NULL && i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 577d4dc..62a97e9 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/pcpu.h> @@ -157,7 +158,7 @@ cpu_mp_start(void) cpu.cr_cpuid); goto next; } - if (all_cpus & (1 << cpu.cr_cpuid)) { + if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) { printf("SMP: cpu%d: skipped - duplicate ID\n", cpu.cr_cpuid); goto next; @@ -174,9 +175,9 @@ cpu_mp_start(void) pc->pc_cpuid = bsp.cr_cpuid; pc->pc_bsp = 1; } - pc->pc_cpumask = 1 << pc->pc_cpuid; + CPU_SETOF(pc->pc_cpuid, &pc->pc_cpumask); pc->pc_hwref = cpu.cr_hwref; - all_cpus |= pc->pc_cpumask; + CPU_OR(&all_cpus, &pc->pc_cpumask); next: error = platform_smp_next_cpu(&cpu); } @@ -214,7 +215,8 @@ cpu_mp_unleash(void *dummy) smp_cpus = 0; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); if (!pc->pc_bsp) { if (bootverbose) printf("Waking up CPU %d (dev=%x)\n", @@ -236,7 +238,7 @@ cpu_mp_unleash(void *dummy) pc->pc_cpuid, pc->pc_pir, pc->pc_awake); smp_cpus++; } else - stopped_cpus |= (1 << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &stopped_cpus); } ap_awake = 1; @@ -276,7 +278,7 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL); int powerpc_ipi_handler(void *arg) { - cpumask_t self; + cpuset_t self; uint32_t ipimask; int msg; @@ -311,11 +313,11 @@ powerpc_ipi_handler(void *arg) savectx(&stoppcbs[PCPU_GET(cpuid)]); self = PCPU_GET(cpumask); savectx(PCPU_GET(curpcb)); - atomic_set_int(&stopped_cpus, self); - while ((started_cpus & self) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &self); + while (!CPU_OVERLAP(&started_cpus, &self)) cpu_spinwait(); - atomic_clear_int(&started_cpus, self); - atomic_clear_int(&stopped_cpus, self); + CPU_NAND_ATOMIC(&started_cpus, &self); + CPU_NAND_ATOMIC(&stopped_cpus, &self); CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__); break; case IPI_HARDCLOCK: @@ -343,12 +345,12 @@ ipi_send(struct pcpu *pc, int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } diff --git a/sys/powerpc/powerpc/openpic.c b/sys/powerpc/powerpc/openpic.c index 042f8b8..347dc3f 100644 --- a/sys/powerpc/powerpc/openpic.c +++ b/sys/powerpc/powerpc/openpic.c @@ -231,7 +231,7 @@ openpic_common_attach(device_t dev, uint32_t node) */ void -openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) +openpic_bind(device_t dev, u_int irq, cpuset_t cpumask) { struct openpic_softc *sc; @@ -240,7 +240,12 @@ openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) return; sc = device_get_softc(dev); - openpic_write(sc, OPENPIC_IDEST(irq), cpumask); + + /* + * XXX: openpic_write() is very special and just needs a 32 bits mask. + * For the moment, just play dirty and get the first half word. + */ + openpic_write(sc, OPENPIC_IDEST(irq), cpumask.__bits[0] & 0xffffffff); } void diff --git a/sys/powerpc/powerpc/pic_if.m b/sys/powerpc/powerpc/pic_if.m index 185cc08..e429d31 100644 --- a/sys/powerpc/powerpc/pic_if.m +++ b/sys/powerpc/powerpc/pic_if.m @@ -28,6 +28,7 @@ # #include <sys/bus.h> +#include <sys/cpuset.h> #include <machine/frame.h> INTERFACE pic; @@ -35,7 +36,7 @@ INTERFACE pic; METHOD void bind { device_t dev; u_int irq; - cpumask_t cpumask; + cpuset_t cpumask; }; METHOD void config { diff --git a/sys/sparc64/include/_types.h b/sys/sparc64/include/_types.h index f810c15..7e993c4 100644 --- a/sys/sparc64/include/_types.h +++ b/sys/sparc64/include/_types.h @@ -55,7 +55,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/sparc64/include/ktr.h b/sys/sparc64/include/ktr.h index 5948ba2..2a9966b 100644 --- a/sys/sparc64/include/ktr.h +++ b/sys/sparc64/include/ktr.h @@ -85,7 +85,9 @@ l2: add r2, 1, r3 ; \ lduw [PCPU(MID)], r1 ; \ mov 1, r2 ; \ sllx r2, r1, r1 ; \ +#ifdef notyet \ TEST(ktr_cpumask, r1, r2, r3, l3) ; \ +#endif \ ATR(desc, r1, r2, r3, l1, l2) #endif /* LOCORE */ diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index e16ea97..adad257 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -40,6 +40,7 @@ #define _MACHINE_PMAP_H_ #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> #include <machine/cache.h> @@ -61,7 +62,7 @@ struct pmap { struct mtx pm_mtx; struct tte *pm_tsb; vm_object_t pm_tsb_obj; - cpumask_t pm_active; + cpuset_t pm_active; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h index 3ca8e03..1ba0d9e 100644 --- a/sys/sparc64/include/smp.h +++ b/sys/sparc64/include/smp.h @@ -38,6 +38,7 @@ #ifndef LOCORE +#include <sys/cpuset.h> #include <sys/proc.h> #include <sys/sched.h> @@ -76,17 +77,17 @@ struct cpu_start_args { }; struct ipi_cache_args { - cpumask_t ica_mask; + cpuset_t ica_mask; vm_paddr_t ica_pa; }; struct ipi_rd_args { - cpumask_t ira_mask; + cpuset_t ira_mask; register_t *ira_val; }; struct ipi_tlb_args { - cpumask_t ita_mask; + cpuset_t ita_mask; struct pmap *ita_pmap; u_long ita_start; u_long ita_end; @@ -100,7 +101,7 @@ extern struct pcb stoppcbs[]; void cpu_mp_bootstrap(struct pcpu *pc); void cpu_mp_shutdown(void); -typedef void cpu_ipi_selected_t(u_int, u_long, u_long, u_long); +typedef void cpu_ipi_selected_t(cpuset_t, u_long, u_long, u_long); extern cpu_ipi_selected_t *cpu_ipi_selected; typedef void cpu_ipi_single_t(u_int, u_long, u_long, u_long); extern cpu_ipi_single_t *cpu_ipi_single; @@ -140,7 +141,7 @@ ipi_all_but_self(u_int ipi) } static __inline void -ipi_selected(u_int cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi); @@ -197,7 +198,8 @@ ipi_rd(u_int cpu, void *func, u_long *val) sched_pin(); ira = &ipi_rd_args; mtx_lock_spin(&ipi_mtx); - ira->ira_mask = 1 << cpu | PCPU_GET(cpumask); + ira->ira_mask = PCPU_GET(cpumask); + CPU_SET(cpu, &ira->ira_mask); ira->ira_val = val; cpu_ipi_single(cpu, 0, (u_long)func, (u_long)ira); return (&ira->ira_mask); @@ -207,18 +209,21 @@ static __inline void * ipi_tlb_context_demap(struct pmap *pm) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap, (u_long)ita); @@ -229,18 +234,21 @@ static __inline void * ipi_tlb_page_demap(struct pmap *pm, vm_offset_t va) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; ita->ita_va = va; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita); @@ -251,18 +259,21 @@ static __inline void * ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; ita->ita_start = start; ita->ita_end = end; @@ -274,11 +285,11 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) static __inline void ipi_wait(void *cookie) { - volatile cpumask_t *mask; + volatile cpuset_t *mask; if ((mask = cookie) != NULL) { - atomic_clear_int(mask, PCPU_GET(cpumask)); - while (*mask != 0) + CPU_NAND_ATOMIC(mask, PCPU_PTR(cpumask)); + while (!CPU_EMPTY(mask)) ; mtx_unlock_spin(&ipi_mtx); sched_unpin(); diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index e33e581..89ec718 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/smp.h> #include <sys/vmmeter.h> +#include <sys/_cpuset.h> #include <vm/vm.h> #include <vm/vm_page.h> @@ -59,6 +60,8 @@ ASSYM(PCPU_PAGES, PCPU_PAGES); ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT); +ASSYM(_NCPUBITS, _NCPUBITS); + #ifdef SUN4U ASSYM(TLB_DEMAP_ALL, TLB_DEMAP_ALL); #endif @@ -137,7 +140,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); -ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask)); ASSYM(PC_IRHEAD, offsetof(struct pcpu, pc_irhead)); ASSYM(PC_IRTAIL, offsetof(struct pcpu, pc_irtail)); ASSYM(PC_IRFREE, offsetof(struct pcpu, pc_irfree)); diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c index f6ef9a7..ed30182 100644 --- a/sys/sparc64/sparc64/intr_machdep.c +++ b/sys/sparc64/sparc64/intr_machdep.c @@ -445,8 +445,7 @@ intr_describe(int vec, void *ih, const char *descr) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; static void @@ -468,7 +467,7 @@ intr_assign_next_cpu(struct intr_vector *iv) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); } /* Attempt to bind the specified IRQ to the specified CPU. */ @@ -504,7 +503,7 @@ intr_add_cpu(u_int cpu) if (bootverbose) printf("INTR: Adding CPU %d as a target\n", cpu); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -518,6 +517,9 @@ intr_shuffle_irqs(void *arg __unused) struct intr_vector *iv; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S index 5a8a105..d378035 100644 --- a/sys/sparc64/sparc64/mp_exception.S +++ b/sys/sparc64/sparc64/mp_exception.S @@ -38,9 +38,19 @@ __FBSDID("$FreeBSD$"); .register %g2, #ignore .register %g3, #ignore -#define IPI_DONE(r1, r2, r3, r4) \ - lduw [PCPU(CPUMASK)], r4 ; \ - ATOMIC_CLEAR_INT(r1, r2, r3, r4) +#define IPI_DONE(r1, r2, r3, r4, r5) \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r4 ; \ + srl r4, 0, r5 ; \ + sllx r5, PTR_SHIFT, r5 ; \ + add r1, r5, r1 ; \ + smul r4, r3, r3 ; \ + sub r2, r3, r3 ; \ + mov 1, r4 ; \ + sllx r4, r3, r4 ; \ + ATOMIC_CLEAR_LONG(r1, r2, r3, r4) /* * Invalidate a physical page in the data cache. For UltraSPARC I and II. @@ -77,7 +87,7 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_spitfire_dcache_page_inval) @@ -117,7 +127,7 @@ ENTRY(tl_ipi_spitfire_icache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_spitfire_icache_page_inval) @@ -148,7 +158,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_cheetah_dcache_page_inval) @@ -204,7 +214,7 @@ ENTRY(tl_ipi_tlb_page_demap) stxa %g0, [%g2] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_tlb_page_demap) @@ -247,7 +257,7 @@ ENTRY(tl_ipi_tlb_range_demap) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_tlb_range_demap) @@ -271,7 +281,7 @@ ENTRY(tl_ipi_tlb_context_demap) stxa %g0, [%g1] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_tlb_context_demap) @@ -283,7 +293,7 @@ ENTRY(tl_ipi_stick_rd) rd %asr24, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_stick_rd) @@ -295,6 +305,6 @@ ENTRY(tl_ipi_tick_rd) rd %tick, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4) retry END(tl_ipi_tick_rd) diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index 4d9151e..304a0f3 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -121,7 +121,7 @@ cpu_ipi_single_t *cpu_ipi_single; static vm_offset_t mp_tramp; static u_int cpuid_to_mid[MAXCPU]; static int isjbus; -static volatile cpumask_t shutdown_cpus; +static volatile cpuset_t shutdown_cpus; static void ap_count(phandle_t node, u_int mid, u_int cpu_impl); static void ap_start(phandle_t node, u_int mid, u_int cpu_impl); @@ -228,7 +228,7 @@ void cpu_mp_setmaxid() { - all_cpus = 1 << curcpu; + CPU_SETOF(curcpu, &all_cpus); mp_ncpus = 1; mp_maxid = 0; @@ -283,6 +283,7 @@ sun4u_startcpu(phandle_t cpu, void *func, u_long arg) void cpu_mp_start(void) { + cpuset_t ocpus; mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN); @@ -299,7 +300,9 @@ cpu_mp_start(void) KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS, ("%s: can only IPI a maximum of %d JBus-CPUs", __func__, IDR_JALAPENO_MAX_BN_PAIRS)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); smp_active = 1; } @@ -357,7 +360,7 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl) cache_init(pc); - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); intr_add_cpu(cpuid); } @@ -421,6 +424,7 @@ cpu_mp_unleash(void *v) void cpu_mp_bootstrap(struct pcpu *pc) { + cpuset_t ocpus; volatile struct cpu_start_args *csa; csa = &cpu_start_args; @@ -465,7 +469,9 @@ cpu_mp_bootstrap(struct pcpu *pc) smp_cpus++; KASSERT(curthread != NULL, ("%s: curthread", __func__)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); printf("SMP: AP CPU #%d Launched!\n", curcpu); csa->csa_count--; @@ -484,14 +490,22 @@ cpu_mp_bootstrap(struct pcpu *pc) void cpu_mp_shutdown(void) { + cpuset_t cpus; int i; critical_enter(); shutdown_cpus = PCPU_GET(other_cpus); - if (stopped_cpus != PCPU_GET(other_cpus)) /* XXX */ - stop_cpus(stopped_cpus ^ PCPU_GET(other_cpus)); + cpus = shutdown_cpus; + + /* XXX: Stop all the CPUs which aren't already. */ + if (CPU_CMP(&stopped_cpus, &cpus)) { + + /* pc_other_cpus is just a flat "on" mask without curcpu. */ + CPU_NAND(&cpus, &stopped_cpus); + stop_cpus(cpus); + } i = 0; - while (shutdown_cpus != 0) { + while (!CPU_EMPTY(&shutdown_cpus)) { if (i++ > 100000) { printf("timeout shutting down CPUs.\n"); break; @@ -509,20 +523,24 @@ cpu_ipi_ast(struct trapframe *tf) static void cpu_ipi_stop(struct trapframe *tf) { + cpuset_t tcmask; CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu); + sched_pin(); savectx(&stoppcbs[curcpu]); - atomic_set_acq_int(&stopped_cpus, PCPU_GET(cpumask)); - while ((started_cpus & PCPU_GET(cpumask)) == 0) { - if ((shutdown_cpus & PCPU_GET(cpumask)) != 0) { - atomic_clear_int(&shutdown_cpus, PCPU_GET(cpumask)); + tcmask = PCPU_GET(cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &tcmask); + while (!CPU_OVERLAP(&started_cpus, &tcmask)) { + if (CPU_OVERLAP(&shutdown_cpus, &tcmask)) { + CPU_NAND_ATOMIC(&shutdown_cpus, &tcmask); (void)intr_disable(); for (;;) ; } } - atomic_clear_rel_int(&started_cpus, PCPU_GET(cpumask)); - atomic_clear_rel_int(&stopped_cpus, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&started_cpus, &tcmask); + CPU_NAND_ATOMIC(&stopped_cpus, &tcmask); + sched_unpin(); CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu); } @@ -551,13 +569,13 @@ cpu_ipi_hardclock(struct trapframe *tf) } static void -spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { u_int cpu; - while (cpus) { - cpu = ffs(cpus) - 1; - cpus &= ~(1 << cpu); + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &cpus); spitfire_ipi_single(cpu, d0, d1, d2); } } @@ -657,20 +675,21 @@ cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int bnp; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -681,7 +700,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) membar(Sync); bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT, ASI_SDB_INTR_W, 0); @@ -698,9 +717,9 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) return; bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { if ((ids & (IDR_NACK << (2 * bnp))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); bnp++; } } @@ -709,7 +728,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) * CPUs we actually haven't tried to send an IPI to, * but which apparently can be safely ignored. */ - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; /* * Leave interrupts enabled for a bit before retrying @@ -719,11 +738,11 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } static void @@ -772,19 +791,20 @@ jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -794,7 +814,7 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2); membar(Sync); for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0); membar(Sync); @@ -808,10 +828,10 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0) return; for (cpu = 0; cpu < mp_ncpus; cpu++) - if ((cpus & (1 << cpu)) != 0) + if (CPU_ISSET(cpu, &cpus)) if ((ids & (IDR_NACK << (2 * cpuid_to_mid[cpu]))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); /* * Leave interrupts enabled for a bit before retrying * in order to avoid deadlocks if the other CPUs are @@ -820,9 +840,9 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index c34fc45..b01a558 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -664,7 +664,7 @@ pmap_bootstrap(u_int cpu_impl) pm = kernel_pmap; for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = ~0; + CPU_FILL(&pm->pm_active); /* * Flush all non-locked TLB entries possibly left over by the @@ -1189,7 +1189,7 @@ pmap_pinit0(pmap_t pm) PMAP_LOCK_INIT(pm); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); pm->pm_tsb = NULL; pm->pm_tsb_obj = NULL; bzero(&pm->pm_stats, sizeof(pm->pm_stats)); @@ -1229,7 +1229,7 @@ pmap_pinit(pmap_t pm) mtx_lock_spin(&sched_lock); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = -1; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); mtx_unlock_spin(&sched_lock); VM_OBJECT_LOCK(pm->pm_tsb_obj); @@ -2230,7 +2230,7 @@ pmap_activate(struct thread *td) PCPU_SET(tlb_ctx, context + 1); pm->pm_context[curcpu] = context; - pm->pm_active |= PCPU_GET(cpumask); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); PCPU_SET(pmap, pm); stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb); diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index ea13779..7515734 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -164,20 +164,29 @@ ENTRY(cpu_switch) * If there was no non-kernel pmap, don't try to deactivate it. */ brz,pn %l2, 3f - lduw [PCPU(CPUMASK)], %l4 + lduw [PCPU(CPUID)], %l3 /* * Mark the pmap of the last non-kernel vmspace to run as no longer * active on this CPU. */ - lduw [%l2 + PM_ACTIVE], %l3 - andn %l3, %l4, %l3 - stw %l3, [%l2 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l2 + %l4], %l6 + andn %l6, %l5, %l6 + stx %l6, [%l2 + %l4] /* * Take away its context number. */ - lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, PM_CONTEXT, %l4 mov -1, %l5 @@ -210,18 +219,27 @@ ENTRY(cpu_switch) /* * Set the new context number in the pmap. */ - lduw [PCPU(CPUID)], %i4 - sllx %i4, INT_SHIFT, %i4 + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %i4 add %l1, PM_CONTEXT, %i5 stw %i3, [%i4 + %i5] /* * Mark the pmap as active on this CPU. */ - lduw [%l1 + PM_ACTIVE], %i4 - lduw [PCPU(CPUMASK)], %i5 - or %i4, %i5, %i4 - stw %i4, [%l1 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l1 + %l4], %l6 + or %l6, %l5, %l6 + stx %l6, [%l1 + %l4] /* * Make note of the change in pmap. diff --git a/sys/sparc64/sparc64/tlb.c b/sys/sparc64/sparc64/tlb.c index 990c777..9fcece6 100644 --- a/sys/sparc64/sparc64/tlb.c +++ b/sys/sparc64/sparc64/tlb.c @@ -80,7 +80,7 @@ tlb_context_demap(struct pmap *pm) PMAP_STATS_INC(tlb_ncontext_demap); cookie = ipi_tlb_context_demap(pm); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_context_demap: inactive pmap?")); stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0); @@ -101,7 +101,7 @@ tlb_page_demap(struct pmap *pm, vm_offset_t va) PMAP_STATS_INC(tlb_npage_demap); cookie = ipi_tlb_page_demap(pm, va); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_page_demap: inactive pmap?")); if (pm == kernel_pmap) @@ -128,7 +128,7 @@ tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) PMAP_STATS_INC(tlb_nrange_demap); cookie = ipi_tlb_range_demap(pm, start, end); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_range_demap: inactive pmap?")); if (pm == kernel_pmap) diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h new file mode 100644 index 0000000..42a0a6a --- /dev/null +++ b/sys/sys/_cpuset.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> + * All rights reserved. + * + * Copyright (c) 2008 Nokia Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__CPUSET_H_ +#define _SYS__CPUSET_H_ + +#ifdef _KERNEL +#define CPU_SETSIZE MAXCPU +#endif + +#define CPU_MAXSIZE 128 + +#ifndef CPU_SETSIZE +#define CPU_SETSIZE CPU_MAXSIZE +#endif + +#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ +#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) + +typedef struct _cpuset { + long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; +} cpuset_t; + +#endif /* !_SYS__CPUSET_H_ */ diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h index 75a159c..15d6c49 100644 --- a/sys/sys/_rmlock.h +++ b/sys/sys/_rmlock.h @@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker); struct rmlock { struct lock_object lock_object; - volatile cpumask_t rm_writecpus; + volatile cpuset_t rm_writecpus; LIST_HEAD(,rm_priotracker) rm_activeReaders; union { struct mtx _rm_lock_mtx; diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index 854fa29..030a874 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -32,22 +32,9 @@ #ifndef _SYS_CPUSET_H_ #define _SYS_CPUSET_H_ -#ifdef _KERNEL -#define CPU_SETSIZE MAXCPU -#endif +#include <sys/_cpuset.h> -#define CPU_MAXSIZE 128 - -#ifndef CPU_SETSIZE -#define CPU_SETSIZE CPU_MAXSIZE -#endif - -#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ -#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) - -typedef struct _cpuset { - long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; -} cpuset_t; +#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS) #define __cpuset_mask(n) ((long)1 << ((n) % _NCPUBITS)) #define CPU_CLR(n, p) ((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n)) @@ -66,6 +53,11 @@ typedef struct _cpuset { (p)->__bits[__i] = -1; \ } while (0) +#define CPU_SETOF(n, p) do { \ + CPU_ZERO(p); \ + ((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n)); \ +} while (0) + /* Is p empty. */ #define CPU_EMPTY(p) __extension__ ({ \ __size_t __i; \ @@ -75,6 +67,15 @@ typedef struct _cpuset { __i == _NCPUWORDS; \ }) +/* Is p full set. */ +#define CPU_ISFULLSET(p) __extension__ ({ \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + if ((p)->__bits[__i] != (long)-1) \ + break; \ + __i == _NCPUWORDS; \ +}) + /* Is c a subset of p. */ #define CPU_SUBSET(p, c) __extension__ ({ \ __size_t __i; \ @@ -124,6 +125,33 @@ typedef struct _cpuset { (d)->__bits[__i] &= ~(s)->__bits[__i]; \ } while (0) +#define CPU_CLR_ATOMIC(n, p) \ + atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_SET_ATOMIC(n, p) \ + atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_OR_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_set_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_NAND_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_clear_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_COPY_STORE_REL(f, t) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_store_rel_long(&(t)->__bits[__i], \ + (f)->__bits[__i]); \ +} while (0) + /* * Valid cpulevel_t values. */ @@ -184,6 +212,9 @@ void cpuset_rel(struct cpuset *); int cpuset_setthread(lwpid_t id, cpuset_t *); int cpuset_create_root(struct prison *, struct cpuset **); int cpuset_setproc_update_set(struct proc *, struct cpuset *); +int cpusetobj_ffs(const cpuset_t *); +char *cpusetobj_strprint(char *, const cpuset_t *); +int cpusetobj_strscan(cpuset_t *, const char *); #else __BEGIN_DECLS diff --git a/sys/sys/ktr.h b/sys/sys/ktr.h index 3b78101..7885b22 100644 --- a/sys/sys/ktr.h +++ b/sys/sys/ktr.h @@ -97,6 +97,9 @@ #ifndef LOCORE +#include <sys/param.h> +#include <sys/_cpuset.h> + struct ktr_entry { u_int64_t ktr_timestamp; int ktr_cpu; @@ -107,7 +110,7 @@ struct ktr_entry { u_long ktr_parms[KTR_PARMS]; }; -extern int ktr_cpumask; +extern cpuset_t ktr_cpumask; extern int ktr_mask; extern int ktr_entries; extern int ktr_verbose; diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index 0bb2cbd..e6044a7 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -37,6 +37,7 @@ #error "no assembler-serviceable parts inside" #endif +#include <sys/_cpuset.h> #include <sys/queue.h> #include <sys/vmmeter.h> #include <sys/resource.h> @@ -162,8 +163,6 @@ struct pcpu { uint64_t pc_switchtime; /* cpu_ticks() at last csw */ int pc_switchticks; /* `ticks' at last csw */ u_int pc_cpuid; /* This cpu number */ - cpumask_t pc_cpumask; /* This cpu mask */ - cpumask_t pc_other_cpus; /* Mask of all other cpus */ STAILQ_ENTRY(pcpu) pc_allcpu; struct lock_list_entry *pc_spinlocks; #ifdef KTR @@ -197,6 +196,18 @@ struct pcpu { * if only to make kernel debugging easier. */ PCPU_MD_FIELDS; + + /* + * XXX + * For the time being, keep the cpuset_t objects as the very last + * members of the structure. + * They are actually tagged to be removed soon, but as long as this + * does not happen, it is necessary to find a way to implement + * easilly interfaces to userland and leaving them last makes that + * possible. + */ + cpuset_t pc_cpumask; /* This cpu mask */ + cpuset_t pc_other_cpus; /* Mask of all other cpus */ } __aligned(CACHE_LINE_SIZE); #ifdef _KERNEL diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h index 3e8c1ef..796c4ca 100644 --- a/sys/sys/pmckern.h +++ b/sys/sys/pmckern.h @@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame); extern struct sx pmc_sx; /* Per-cpu flags indicating availability of sampling data */ -extern volatile cpumask_t pmc_cpumask; +extern volatile cpuset_t pmc_cpumask; /* Count of system-wide sampling PMCs in existence */ extern volatile int pmc_ss_count; @@ -122,7 +122,7 @@ do { \ #define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) /* Check if a CPU has recorded samples. */ -#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C)))) +#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask))) /* * Helper functions. diff --git a/sys/sys/smp.h b/sys/sys/smp.h index f8cce5f..66e8008 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -16,6 +16,8 @@ #ifndef LOCORE +#include <sys/cpuset.h> + /* * Topology of a NUMA or HTT system. * @@ -32,7 +34,7 @@ struct cpu_group { struct cpu_group *cg_parent; /* Our parent group. */ struct cpu_group *cg_child; /* Optional children groups. */ - cpumask_t cg_mask; /* Mask of cpus in this group. */ + cpuset_t cg_mask; /* Mask of cpus in this group. */ int32_t cg_count; /* Count of cpus in this group. */ int16_t cg_children; /* Number of children groups. */ int8_t cg_level; /* Shared cache level. */ @@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); extern void (*cpustop_restartfunc)(void); extern int smp_active; extern int smp_cpus; -extern volatile cpumask_t started_cpus; -extern volatile cpumask_t stopped_cpus; -extern cpumask_t hlt_cpus_mask; -extern cpumask_t logical_cpus_mask; +extern volatile cpuset_t started_cpus; +extern volatile cpuset_t stopped_cpus; +extern cpuset_t hlt_cpus_mask; +extern cpuset_t logical_cpus_mask; #endif /* SMP */ extern u_int mp_maxid; @@ -82,14 +84,14 @@ extern int mp_maxcpus; extern int mp_ncpus; extern volatile int smp_started; -extern cpumask_t all_cpus; +extern cpuset_t all_cpus; /* * Macro allowing us to determine whether a CPU is absent at any given * time, thus permitting us to configure sparse maps of cpuid-dependent * (per-CPU) structures. */ -#define CPU_ABSENT(x_cpu) ((all_cpus & (1 << (x_cpu))) == 0) +#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) /* * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an @@ -158,11 +160,11 @@ void cpu_mp_setmaxid(void); void cpu_mp_start(void); void forward_signal(struct thread *); -int restart_cpus(cpumask_t); -int stop_cpus(cpumask_t); -int stop_cpus_hard(cpumask_t); +int restart_cpus(cpuset_t); +int stop_cpus(cpuset_t); +int stop_cpus_hard(cpuset_t); #if defined(__amd64__) -int suspend_cpus(cpumask_t); +int suspend_cpus(cpuset_t); #endif void smp_rendezvous_action(void); extern struct mtx smp_ipi_mtx; @@ -173,7 +175,7 @@ void smp_rendezvous(void (*)(void *), void (*)(void *), void (*)(void *), void *arg); -void smp_rendezvous_cpus(cpumask_t, +void smp_rendezvous_cpus(cpuset_t, void (*)(void *), void (*)(void *), void (*)(void *), diff --git a/sys/sys/types.h b/sys/sys/types.h index 4bc1a8d..cb513af 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -99,7 +99,6 @@ typedef __clockid_t clockid_t; #define _CLOCKID_T_DECLARED #endif -typedef __cpumask_t cpumask_t; typedef __critical_t critical_t; /* Critical section value */ typedef __int64_t daddr_t; /* disk address */ diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index f5ac443..dfef3a7 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -802,7 +802,7 @@ lapic_handle_timer(struct trapframe *frame) * and unlike other schedulers it actually schedules threads to * those CPUs. */ - if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0) + if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c index cce1e0e..80d4bd7 100644 --- a/usr.sbin/pmccontrol/pmccontrol.c +++ b/usr.sbin/pmccontrol/pmccontrol.c @@ -28,8 +28,9 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> +#include <sys/param.h> #include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/sysctl.h> #include <assert.h> @@ -133,26 +134,32 @@ pmcc_init_debug(void) static int pmcc_do_enable_disable(struct pmcc_op_list *op_list) { + long cpusetsize; int c, error, i, j, ncpu, npmc, t; - cpumask_t haltedcpus, cpumask; + cpuset_t haltedcpus, cpumask; struct pmcc_op *np; unsigned char *map; unsigned char op; int cpu, pmc; - size_t dummy; + size_t setsize; if ((ncpu = pmc_ncpu()) < 0) err(EX_OSERR, "Unable to determine the number of cpus"); /* Determine the set of active CPUs. */ - cpumask = (1 << ncpu) - 1; - dummy = sizeof(int); - haltedcpus = (cpumask_t) 0; + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { + err(EX_OSERR, "ERROR: Cannot determine which CPUs are " + "halted"); + } + CPU_ZERO(&haltedcpus); + setsize = (size_t)cpusetsize; if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus, - &dummy, NULL, 0) < 0) + &setsize, NULL, 0) < 0) err(EX_OSERR, "ERROR: Cannot determine which CPUs are " "halted"); - cpumask &= ~haltedcpus; + CPU_FILL(&cpumask); + CPU_NAND(&cpumask, &haltedcpus); /* Determine the maximum number of PMCs in any CPU. */ npmc = 0; @@ -200,7 +207,7 @@ pmcc_do_enable_disable(struct pmcc_op_list *op_list) if (cpu == PMCC_CPU_ALL) for (i = 0; i < ncpu; i++) { - if ((1 << i) & cpumask) + if (CPU_ISSET(i, &cpumask)) SET_PMCS(i, pmc, op); } else |