summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/setup_percpu.c23
-rw-r--r--include/linux/cpumask.h26
-rw-r--r--kernel/cpu.c128
3 files changed, 43 insertions, 134 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1cd53df..76e305e 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void)
#endif
}
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-/*
- * Replace static cpumask_of_cpu_map in the initdata section,
- * with one that's allocated sized by the possible number of cpus.
- *
- * (requires nr_cpu_ids to be initialized)
- */
-static void __init setup_cpumask_of_cpu(void)
-{
- int i;
-
- /* alloc_bootmem zeroes memory */
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
- for (i = 0; i < nr_cpu_ids; i++)
- cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
#ifdef CONFIG_X86_32
/*
* Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void)
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
-
- /* Setup cpumask_of_cpu map */
- setup_cpumask_of_cpu();
}
#endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fa3b6d..96d0509 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -265,10 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
}
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+ cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+{
+ const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+ p -= cpu / BITS_PER_LONG;
+ return (const cpumask_t *)p;
+}
+
+/*
+ * In cases where we take the address of the cpumask immediately,
+ * gcc optimizes it out (it's a constant) and there's no huge stack
+ * variable created:
+ */
+#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
-/* cpumask_of_cpu_map[] is in kernel/cpu.c */
-extern const cpumask_t *cpumask_of_cpu_map;
-#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu])
#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a35d899..06a8358 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,115 +462,27 @@ out:
#endif /* CONFIG_SMP */
-/* 64 bits of zeros, for initializers. */
-#if BITS_PER_LONG == 32
-#define Z64 0, 0
-#else
-#define Z64 0
-#endif
+/*
+ * cpu_bit_bitmap[] is a special, "compressed" data structure that
+ * represents all NR_CPUS bits binary values of 1<<nr.
+ *
+ * It is used by cpumask_of_cpu() to get a constant address to a CPU
+ * mask value that has a single bit set only.
+ */
-/* Initializer macros. */
-#define CMI0(n) { .bits = { 1UL << (n) } }
-#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
-
-#define CMI8(n, ...) \
- CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__), \
- CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__), \
- CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__), \
- CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
-
-#if BITS_PER_LONG == 32
-#define CMI64(n, ...) \
- CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
- CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
- CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__), \
- CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
-#else
-#define CMI64(n, ...) \
- CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
- CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
- CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__), \
- CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
-#endif
+/* cpu_bit_bitmap[0] is empty - so we can back into it */
+#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
+#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
+#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
+#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
-#define CMI256(n, ...) \
- CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__), \
- CMI64((n)+128, Z64, Z64, __VA_ARGS__), \
- CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
-#define Z256 Z64, Z64, Z64, Z64
-
-#define CMI1024(n, ...) \
- CMI256((n), __VA_ARGS__), \
- CMI256((n)+256, Z256, __VA_ARGS__), \
- CMI256((n)+512, Z256, Z256, __VA_ARGS__), \
- CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
-#define Z1024 Z256, Z256, Z256, Z256
-
-/* We want this statically initialized, just to be safe. We try not
- * to waste too much space, either. */
-static const cpumask_t cpumask_map[]
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-__initdata
-#endif
-= {
- CMI0(0), CMI0(1), CMI0(2), CMI0(3),
-#if NR_CPUS > 4
- CMI0(4), CMI0(5), CMI0(6), CMI0(7),
-#endif
-#if NR_CPUS > 8
- CMI0(8), CMI0(9), CMI0(10), CMI0(11),
- CMI0(12), CMI0(13), CMI0(14), CMI0(15),
-#endif
-#if NR_CPUS > 16
- CMI0(16), CMI0(17), CMI0(18), CMI0(19),
- CMI0(20), CMI0(21), CMI0(22), CMI0(23),
- CMI0(24), CMI0(25), CMI0(26), CMI0(27),
- CMI0(28), CMI0(29), CMI0(30), CMI0(31),
-#endif
-#if NR_CPUS > 32
-#if BITS_PER_LONG == 32
- CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
- CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
- CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
- CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
- CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
- CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
- CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
- CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
-#else
- CMI0(32), CMI0(33), CMI0(34), CMI0(35),
- CMI0(36), CMI0(37), CMI0(38), CMI0(39),
- CMI0(40), CMI0(41), CMI0(42), CMI0(43),
- CMI0(44), CMI0(45), CMI0(46), CMI0(47),
- CMI0(48), CMI0(49), CMI0(50), CMI0(51),
- CMI0(52), CMI0(53), CMI0(54), CMI0(55),
- CMI0(56), CMI0(57), CMI0(58), CMI0(59),
- CMI0(60), CMI0(61), CMI0(62), CMI0(63),
-#endif /* BITS_PER_LONG == 64 */
-#endif
-#if NR_CPUS > 64
- CMI64(64, Z64),
-#endif
-#if NR_CPUS > 128
- CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
-#endif
-#if NR_CPUS > 256
- CMI256(256, Z256),
-#endif
-#if NR_CPUS > 512
- CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
-#endif
-#if NR_CPUS > 1024
- CMI1024(1024, Z1024),
-#endif
-#if NR_CPUS > 2048
- CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
-#endif
-#if NR_CPUS > 4096
-#error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
+
+ MASK_DECLARE_8(0), MASK_DECLARE_8(8),
+ MASK_DECLARE_8(16), MASK_DECLARE_8(24),
+#if BITS_PER_LONG > 32
+ MASK_DECLARE_8(32), MASK_DECLARE_8(40),
+ MASK_DECLARE_8(48), MASK_DECLARE_8(56),
#endif
};
-
-const cpumask_t *cpumask_of_cpu_map = cpumask_map;
-
-EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);
+EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
OpenPOWER on IntegriCloud