From dc565b525d4b7091a3abb6616d210c8a896a11d7 Mon Sep 17 00:00:00 2001
From: "hawkes@sgi.com" <hawkes@sgi.com>
Date: Mon, 10 Oct 2005 08:43:26 -0700
Subject: [IA64] wider use of for_each_cpu_mask() in arch/ia64

In arch/ia64 change the explicit use of for-loops and NR_CPUS into the
general for_each_cpu() or for_each_online_cpu() constructs, as
appropriate.  This widens the scope of potential future optimizations
of the general constructs, as well as takes advantage of the existing
optimizations of first_cpu() and next_cpu().

Signed-off-by: John Hawkes <hawkes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/tlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/ia64/mm/tlb.c')

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 464557e..987fb75 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -77,9 +77,10 @@ wrap_mmu_context (struct mm_struct *mm)
 	/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
 	{
 		int cpu = get_cpu(); /* prevent preemption/migration */
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i) && (i != cpu))
+		for_each_online_cpu(i) {
+			if (i != cpu)
 				per_cpu(ia64_need_tlb_flush, i) = 1;
+		}
 		put_cpu();
 	}
 	local_flush_tlb_all();
-- 
cgit v1.1


From c1902aae322952f8726469a6657df7b9d5c794fe Mon Sep 17 00:00:00 2001
From: Dean Roe <roe@sgi.com>
Date: Thu, 27 Oct 2005 15:41:04 -0500
Subject: [IA64] - Avoid slow TLB purges on SGI Altix systems

flush_tlb_all() can be a scaling issue on large SGI Altix systems
since it uses the global call_lock and always executes on all cpus.
When a process enters flush_tlb_range() to purge TLBs for another
process, it is possible to avoid flush_tlb_all() and instead allow
sn2_global_tlb_purge() to purge TLBs only where necessary.

This patch modifies flush_tlb_range() so that this case can be handled
by platform TLB purge functions and updates ia64_global_tlb_purge()
accordingly.  sn2_global_tlb_purge() now calculates the region register
value from the mm argument introduced with this patch.

Signed-off-by: Dean Roe <roe@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/tlb.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/ia64/mm/tlb.c')

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 464557e..99ea8c7 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -86,10 +86,15 @@ wrap_mmu_context (struct mm_struct *mm)
 }
 
 void
-ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)
 {
 	static DEFINE_SPINLOCK(ptcg_lock);
 
+	if (mm != current->active_mm) {
+		flush_tlb_all();
+		return;
+	}
+
 	/* HW requires global serialization of ptc.ga.  */
 	spin_lock(&ptcg_lock);
 	{
@@ -135,15 +140,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 	unsigned long size = end - start;
 	unsigned long nbits;
 
+#ifndef CONFIG_SMP
 	if (mm != current->active_mm) {
-		/* this does happen, but perhaps it's not worth optimizing for? */
-#ifdef CONFIG_SMP
-		flush_tlb_all();
-#else
 		mm->context = 0;
-#endif
 		return;
 	}
+#endif
 
 	nbits = ia64_fls(size + 0xfff);
 	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
@@ -153,7 +155,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 	start &= ~((1UL << nbits) - 1);
 
 # ifdef CONFIG_SMP
-	platform_global_tlb_purge(start, end, nbits);
+	platform_global_tlb_purge(mm, start, end, nbits);
 # else
 	do {
 		ia64_ptcl(start, (nbits<<2));
-- 
cgit v1.1