Commit the support for removing cpumask_t and replacing it directly with

cpuset_t objects. That is going to offer the underlying support for a simple bump of MAXCPU and then support for number of cpus > 32 (as it is today). Right now, cpumask_t is an int, 32 bits on all our supported architecture. cpumask_t on the other side is implemented as an array of longs, and easilly extendible by definition. The architectures touched by this commit are the following: - amd64 - i386 - pc98 - arm - ia64 - XEN while the others are still missing. Userland is believed to be fully converted with the changes contained here. Some technical notes: - This commit may be considered an ABI nop for all the architectures different from amd64 and ia64 (and sparc64 in the future) - per-cpu members, which are now converted to cpuset_t, needs to be accessed avoiding migration, because the size of cpuset_t should be considered unknown - size of cpuset_t objects is different from kernel and userland (this is primirally done in order to leave some more space in userland to cope with KBI extensions). If you need to access kernel cpuset_t from the userland please refer to example in this patch on how to do that correctly (kgdb may be a good source, for example). - Support for other architectures is going to be added soon - Only MAXCPU for amd64 is bumped now The patch has been tested by sbruno and Nicholas Esborn on opteron 4 x 12 pack CPUs. More testing on big SMP is expected to came soon. pluknet tested the patch with his 8-ways on both amd64 and i386. Tested by: pluknet, sbruno, gianni, Nicholas Esborn Reviewed by: jeff, jhb, sbruno
author: attilio <attilio@FreeBSD.org> 2011-05-05 14:39:14 +0000
committer: attilio <attilio@FreeBSD.org> 2011-05-05 14:39:14 +0000
commit: fe4de567b50f7ca317b16f69b7b3a7de693025af (patch)
tree: e5d54bcefbf1fe0c4c6804bdc5c4852b1b64518a /sys/i386
parent: d3d3db9bac709a7fa4319bf5e8c8fb4e05918772 (diff)
download: FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.zip
FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.tar.gz
10 files changed, 303 insertions, 232 deletions
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index 77b8004..56529f7 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 /*
@@ -432,7 +431,7 @@ intr_next_cpu(void)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused)
 	return;
 #endif
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index a07b06c..dfe6434 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -172,7 +173,7 @@ static u_long *ipi_hardclock_counts[MAXCPU];
  * Local data and functions.
  */
 
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -209,7 +210,7 @@ static void	release_aps(void *dummy);
 
 static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
 static struct	sysctl_ctx_list logical_cpu_clist;
 
@@ -358,7 +359,7 @@ topo_probe(void)
 	if (cpu_topo_probed)
 		return;
 
-	logical_cpus_mask = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -500,7 +501,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -631,6 +632,7 @@ cpu_mp_announce(void)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	struct pcpu *pc;
 	vm_offset_t addr;
 	int	gsel_tss;
@@ -755,19 +757,22 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	if (bootverbose)
 		lapic_dump("AP");
@@ -904,6 +909,7 @@ assign_cpu_ids(void)
 static int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 #ifndef PC98
 	u_char mpbiosreason;
 #endif
@@ -963,11 +969,13 @@ start_all_aps(void)
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1164,6 +1172,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending));	
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
+/*
  * Flush the TLB on all other CPU's
  */
 static void
@@ -1187,28 +1219,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		sched_pin();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		sched_unpin();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1217,39 +1240,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
-	if (IPI_IS_BITMAPED(ipi)) {
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending));	
-		if (old_pending)
-			return;
-	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
 void
 smp_cache_flush(void)
 {
@@ -1296,7 +1305,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1308,7 +1317,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1320,7 +1329,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1373,7 +1382,7 @@ ipi_bitmap_handler(struct trapframe frame)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1383,12 +1392,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
@@ -1406,7 +1415,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1419,8 +1428,10 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
+	sched_pin();
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
+		sched_unpin();
 		return;
 	}
 
@@ -1430,7 +1441,9 @@ ipi_all_but_self(u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+	sched_unpin();
+
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
@@ -1438,7 +1451,7 @@ ipi_all_but_self(u_int ipi)
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1446,11 +1459,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1462,23 +1477,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int cpu;
 
+	sched_pin();
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
@@ -1505,30 +1522,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 static int
 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 	int error;
 
 	mask = hlt_cpus_mask;
-	error = sysctl_handle_int(oidp, &mask, 0, req);
+	error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req);
 	if (error || !req->newptr)
 		return (error);
 
-	if (logical_cpus_mask != 0 &&
-	    (mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
 	if (! hyperthreading_allowed)
-		mask |= hyperthreading_cpus_mask;
+		CPU_OR(&mask, &hyperthreading_cpus_mask);
 
-	if ((mask & all_cpus) == all_cpus)
-		mask &= ~(1<<0);
+	if (CPU_SUBSET(&mask, &all_cpus))
+		CPU_CLR(0, &mask);
 	hlt_cpus_mask = mask;
 	return (error);
 }
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_hlt_cpus, "IU",
+SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus,
+    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S",
     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 
 static int
@@ -1542,15 +1559,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 		return (error);
 
 	if (disable)
-		hlt_cpus_mask |= logical_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 	else
-		hlt_cpus_mask &= ~logical_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask);
 
 	if (! hyperthreading_allowed)
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hlt_logical_cpus = disable;
 	return (error);
@@ -1577,18 +1594,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 #endif
 
 	if (allowed)
-		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 	else
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if (logical_cpus_mask != 0 &&
-	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hyperthreading_allowed = allowed;
 	return (error);
@@ -1598,7 +1615,7 @@ static void
 cpu_hlt_setup(void *dummy __unused)
 {
 
-	if (logical_cpus_mask != 0) {
+	if (!CPU_EMPTY(&logical_cpus_mask)) {
 		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 		    &hlt_logical_cpus);
 		sysctl_ctx_init(&logical_cpu_clist);
@@ -1612,20 +1629,21 @@ cpu_hlt_setup(void *dummy __unused)
 		    &logical_cpus_mask, 0, "");
 
 		if (hlt_logical_cpus)
-			hlt_cpus_mask |= logical_cpus_mask;
+			CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 
 		/*
 		 * If necessary for security purposes, force
 		 * hyperthreading off, regardless of the value
 		 * of hlt_logical_cpus.
 		 */
-		if (hyperthreading_cpus_mask) {
+		if (!CPU_EMPTY(&hyperthreading_cpus_mask)) {
 			SYSCTL_ADD_PROC(&logical_cpu_clist,
 			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
 			if (! hyperthreading_allowed)
-				hlt_cpus_mask |= hyperthreading_cpus_mask;
+				CPU_OR(&hlt_cpus_mask,
+				    &hyperthreading_cpus_mask);
 		}
 	}
 }
@@ -1634,7 +1652,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 int
 mp_grab_cpu_hlt(void)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 #ifdef MP_WATCHDOG
 	u_int cpuid;
 #endif
@@ -1647,7 +1665,7 @@ mp_grab_cpu_hlt(void)
 #endif
 
 	retval = 0;
-	while (mask & hlt_cpus_mask) {
+	while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) {
 		retval = 1;
 		__asm __volatile("sti; hlt" : : : "memory");
 	}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index e1fe137..c85f138 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	kernel_pmap->pm_root = NULL;
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 
@@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 }
@@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 }
@@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void)
 }
 
 struct pde_action {
-	cpumask_t store;	/* processor that updates the PDE */
-	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	cpuset_t store;		/* processor that updates the PDE */
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg)
 	pd_entry_t *pde;
 	pmap_t pmap;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
+
 		/*
 		 * Elsewhere, this operation requires allpmaps_lock for
 		 * synchronization.  Here, it does not because it is being
@@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg)
 			pde = pmap_pde(pmap, act->va);
 			pde_store(pde, act->newpde);
 		}
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pde_store(act->pde, act->newpde);
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+	sched_pin();
+	if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pmap_update_pde_invalidate(act->va, act->newpde);
+	} else
+		sched_unpin();
 }
 
 /*
@@ -1058,21 +1075,23 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
-	cpumask_t active, cpumask;
+	cpuset_t active, cpumask, other_cpus;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
+	other_cpus = PCPU_GET(other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
-	if ((active & PCPU_GET(other_cpus)) != 0) {
+	if (CPU_OVERLAP(&active, &other_cpus)) {
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
-		smp_rendezvous_cpus(cpumask | active,
+		CPU_OR(&cpumask, &active);
+		smp_rendezvous_cpus(cpumask,
 		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
 		    pmap_update_pde_kernel : pmap_update_pde_user,
 		    pmap_update_pde_teardown, &act);
@@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 			pmap_kenter_pde(va, newpde);
 		else
 			pde_store(pde, newpde);
-		if ((active & cpumask) != 0)
+		if (CPU_OVERLAP(&active, &cpumask))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
@@ -1095,7 +1114,7 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
@@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
@@ -1113,7 +1132,7 @@ PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
@@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap)
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap)
 #endif
 	}
 
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -4985,11 +5004,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 232e1a1..a084e09 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -573,11 +573,13 @@ kvtop(void *addr)
 static void
 cpu_reset_proxy()
 {
+	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
-	stop_cpus((1<<cpu_reset_proxyid));
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -596,25 +598,29 @@ cpu_reset()
 #endif
 
 #ifdef SMP
-	cpumask_t map;
+	cpuset_t map;
 	u_int cnt;
 
 	if (smp_active) {
-		map = PCPU_GET(other_cpus) & ~stopped_cpus;
-		if (map != 0) {
+		sched_pin();
+		map = PCPU_GET(other_cpus);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
+			sched_unpin();
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
 			/* XXX: restart_cpus(1 << 0); */
-			atomic_store_rel_int(&started_cpus, (1 << 0));
+			CPU_SETOF(0, &started_cpus);
+			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -626,7 +632,8 @@ cpu_reset()
 
 			while (1);
 			/* NOTREACHED */
-		}
+		} else
+			sched_unpin();
 
 		DELAY(1000000);
 	}
@@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	struct sf_head *hash_list;
 	struct sf_buf *sf;
 #ifdef SMP
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 #endif
 	int error;
 
@@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	 */
 #ifdef SMP
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
-		sf->cpumask = 0;
+		CPU_ZERO(&sf->cpumask);
 shootdown:
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((sf->cpumask & cpumask) == 0) {
-		sf->cpumask |= cpumask;
+	if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) {
+		CPU_OR(&sf->cpumask, &cpumask);
 		invlpg(sf->kva);
 	}
 	if ((flags & SFB_CPUPRIVATE) == 0) {
-		other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
-		if (other_cpus != 0) {
-			sf->cpumask |= other_cpus;
+		other_cpus = PCPU_GET(other_cpus);
+		CPU_NAND(&other_cpus, &sf->cpumask);
+		if (!CPU_EMPTY(&other_cpus)) {
+			CPU_OR(&sf->cpumask, &other_cpus);
 			smp_masked_invlpg(other_cpus, sf->kva);
 		}
 	}
-	sched_unpin();	
+	sched_unpin();
 #else
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
 		pmap_invalidate_page(kernel_pmap, sf->kva);
diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h
index 7a969fe..3194fd6 100644
--- a/sys/i386/include/_types.h
+++ b/sys/i386/include/_types.h
@@ -69,7 +69,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	unsigned long	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int32_t	__critical_t;
 typedef	long double	__double_t;
 typedef	long double	__float_t;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index eeada2e..3012a00 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -155,6 +155,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -433,7 +434,7 @@ struct pmap {
 	struct mtx		pm_mtx;
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
 #ifdef PAE
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 7bc1095..415dcbb 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -29,6 +29,7 @@
 #ifndef _MACHINE_SF_BUF_H_
 #define _MACHINE_SF_BUF_H_
 
+#include <sys/_cpuset.h>
 #include <sys/queue.h>
 
 struct vm_page;
@@ -40,7 +41,7 @@ struct sf_buf {
 	vm_offset_t	kva;		/* va of mapping */
 	int		ref_count;	/* usage of this mapping */
 #ifdef SMP
-	cpumask_t	cpumask;	/* cpus on which mapping is valid */
+	cpuset_t	cpumask;	/* cpus on which mapping is valid */
 #endif
 };
 
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index b512e00..cdec9c8 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -64,17 +64,17 @@ void 	ipi_bitmap_handler(struct trapframe frame);
 #endif
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
-void	ipi_selected(cpumask_t cpus, u_int ipi);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
 int	mp_grab_cpu_hlt(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
-void	smp_masked_invltlb(cpumask_t mask);
+void	smp_masked_invltlb(cpuset_t mask);
 
 #ifdef XEN
 void ipi_to_irq_init(void);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 670d110..1146ac2 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -116,7 +117,7 @@ volatile int smp_tlb_wait;
 typedef void call_data_func_t(uintptr_t , uintptr_t);
 
 static u_int logical_cpus;
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -149,7 +150,7 @@ static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 
 extern void Xhypervisor_callback(void);
 extern void failsafe_callback(void);
@@ -238,7 +239,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -292,7 +293,8 @@ cpu_mp_start(void)
 	start_all_aps();
 
 	/* Setup the initial logical CPUs info. */
-	logical_cpus = logical_cpus_mask = 0;
+	logical_cpus = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (cpu_feature & CPUID_HTT)
 		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 
@@ -512,6 +514,7 @@ xen_smp_intr_init_cpus(void *unused)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	vm_offset_t addr;
 	int	gsel_tss;
 	
@@ -591,18 +594,21 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 #if 0
 	if (bootverbose)
 		lapic_dump("AP");
@@ -713,6 +719,7 @@ assign_cpu_ids(void)
 int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 	int x,apic_id, cpu;
 	struct pcpu *pc;
 	
@@ -766,12 +773,14 @@ start_all_aps(void)
 				panic("bye-bye");
 		}
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 	
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
 	
@@ -1000,29 +1009,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 	struct _call_data data;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		critical_enter();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		critical_exit();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1034,10 +1034,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	call_data->arg1 = addr1;
 	call_data->arg2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	call_data = NULL;
@@ -1080,7 +1090,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1089,7 +1099,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1098,7 +1108,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1110,7 +1120,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1120,11 +1130,11 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
@@ -1143,7 +1153,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1155,23 +1165,27 @@ ipi_cpu(int cpu, u_int ipi)
 void
 ipi_all_but_self(u_int ipi)
 {
+	cpuset_t other_cpus;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
+	sched_pin();
+	other_cpus = PCPU_GET(other_cpus);
+	sched_unpin();
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	ipi_selected(PCPU_GET(other_cpus), ipi);
+	ipi_selected(other_cpus, ipi);
 }
 
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1179,11 +1193,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1195,20 +1211,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	int cpu = PCPU_GET(cpuid);
-	int cpumask = PCPU_GET(cpumask);
+	cpuset_t cpumask;
+	int cpu;
+
+	sched_pin();
+	cpumask = PCPU_GET(cpumask);
+	cpu = PCPU_GET(cpuid);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index cf36f5e..74ace61 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 #ifdef PAE
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
@@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 	
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
 	    pmap, sva, eva);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 	PT_UPDATES_FLUSH();
 }
@@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 		CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
 		    pmap, sva, eva);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 	PT_UPDATES_FLUSH();
@@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap)
 
 	CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
 	
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap)
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap)
 	}
 	xen_flush_queue();
 	vm_page_unlock_queues();
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -4030,11 +4032,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
author	attilio <attilio@FreeBSD.org>	2011-05-05 14:39:14 +0000
committer	attilio <attilio@FreeBSD.org>	2011-05-05 14:39:14 +0000
commit	fe4de567b50f7ca317b16f69b7b3a7de693025af (patch)
tree	e5d54bcefbf1fe0c4c6804bdc5c4852b1b64518a /sys/i386
parent	d3d3db9bac709a7fa4319bf5e8c8fb4e05918772 (diff)
download	FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.zip FreeBSD-src-fe4de567b50f7ca317b16f69b7b3a7de693025af.tar.gz