summaryrefslogtreecommitdiffstats
path: root/lib/cpumask.c
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2008-05-12 21:21:13 +0200
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 18:23:38 +0200
commit41df0d61c266998b8049df7fec119cd518a43aa1 (patch)
tree6791fa6e3fdaee51ae348f9fe63972d648beaf5a /lib/cpumask.c
parent143aa5c53bd3895d42d7c08753fe58293988a69d (diff)
downloadop-kernel-dev-41df0d61c266998b8049df7fec119cd518a43aa1.zip
op-kernel-dev-41df0d61c266998b8049df7fec119cd518a43aa1.tar.gz
x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting the range of the cpumask operators that loop over the bits in a cpumask_t variable. This removes a large amount of wasted cpu cycles. * Add performance variants of the cpumask operators: int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * Modify following to use performance variants: #define num_online_cpus() cpus_weight_nr(cpu_online_map) #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) #define num_present_cpus() cpus_weight_nr(cpu_present_map) #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) * Comment added to include/linux/cpumask.h: Note: The alternate operations with the suffix "_nr" are used to limit the range of the loop to nr_cpu_ids instead of NR_CPUS when NR_CPUS > 64 for performance reasons. If NR_CPUS is <= 64 then most assembler bitmask operators execute faster with a constant range, so the operator will continue to use NR_CPUS. Another consideration is that nr_cpu_ids is initialized to NR_CPUS and isn't lowered until the possible cpus are discovered (including any disabled cpus). So early uses will span the entire range of NR_CPUS. (The net effect is that for systems with 64 or less CPU's there are no functional changes.) For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Reviewed-by: Paul Jackson <pj@sgi.com> Reviewed-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'lib/cpumask.c')
-rw-r--r--lib/cpumask.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/lib/cpumask.c b/lib/cpumask.c
index bb4f76d..5f97dc2 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp)
}
EXPORT_SYMBOL(__next_cpu);
+#if NR_CPUS > 64
+int __next_cpu_nr(int n, const cpumask_t *srcp)
+{
+ return min_t(int, nr_cpu_ids,
+ find_next_bit(srcp->bits, nr_cpu_ids, n+1));
+}
+EXPORT_SYMBOL(__next_cpu_nr);
+#endif
+
int __any_online_cpu(const cpumask_t *mask)
{
int cpu;
OpenPOWER on IntegriCloud