From 8005aba69a6440a535a4cc2aed99ffca580847e0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 21 Jun 2005 15:39:22 -0700
Subject: [SPARC64]: Fix cmsg length checks in Solaris emulation layer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/solaris/socket.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/solaris/socket.c b/arch/sparc64/solaris/socket.c
index ec8e074..0674058 100644
--- a/arch/sparc64/solaris/socket.c
+++ b/arch/sparc64/solaris/socket.c
@@ -317,8 +317,10 @@ asmlinkage int solaris_sendmsg(int fd, struct sol_nmsghdr __user *user_msg, unsi
 		unsigned long *kcmsg;
 		compat_size_t cmlen;
 
-		if(kern_msg.msg_controllen > sizeof(ctl) &&
-		   kern_msg.msg_controllen <= 256) {
+		if (kern_msg.msg_controllen <= sizeof(compat_size_t))
+			return -EINVAL;
+
+		if(kern_msg.msg_controllen > sizeof(ctl)) {
 			err = -ENOBUFS;
 			ctl_buf = kmalloc(kern_msg.msg_controllen, GFP_KERNEL);
 			if(!ctl_buf)
-- 
cgit v1.1


From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 21 Jun 2005 17:14:34 -0700
Subject: [PATCH] smp_processor_id() cleanup

This patch implements a number of smp_processor_id() cleanup ideas that
Arjan van de Ven and I came up with.

The previous __smp_processor_id/_smp_processor_id/smp_processor_id API
spaghetti was hard to follow both on the implementational and on the
usage side.

Some of the complexity arose from picking wrong names, some of the
complexity comes from the fact that not all architectures defined
__smp_processor_id.

In the new code, there are two externally visible symbols:

 - smp_processor_id(): debug variant.

 - raw_smp_processor_id(): nondebug variant. Replaces all existing
   uses of _smp_processor_id() and __smp_processor_id(). Defined
   by every SMP architecture in include/asm-*/smp.h.

There is one new internal symbol, dependent on DEBUG_PREEMPT:

 - debug_smp_processor_id(): internal debug variant, mapped to
                             smp_processor_id().

Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new
lib/smp_processor_id.c file.  All related comments got updated and/or
clarified.

I have build/boot tested the following 8 .config combinations on x86:

 {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT}

I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT.  (Other
architectures are untested, but should work just fine.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/lib/delay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c
index f6b4c78..e880872 100644
--- a/arch/sparc64/lib/delay.c
+++ b/arch/sparc64/lib/delay.c
@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n)
 {
 	n *= 4;
 
-	n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4));
+	n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4));
 	n >>= 32;
 
 	__delay(n + 1);
-- 
cgit v1.1


From 63551ae0feaaa23807ebea60de1901564bbef32e Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 21 Jun 2005 17:14:44 -0700
Subject: [PATCH] Hugepage consolidation

A lot of the code in arch/*/mm/hugetlbpage.c is quite similar.  This patch
attempts to consolidate a lot of the code across the arch's, putting the
combined version in mm/hugetlb.c.  There are a couple of uglyish hacks in
order to covert all the hugepage archs, but the result is a very large
reduction in the total amount of code.  It also means things like hugepage
lazy allocation could be implemented in one place, instead of six.

Tested, at least a little, on ppc64, i386 and x86_64.

Notes:
	- this patch changes the meaning of set_huge_pte() to be more
	  analagous to set_pte()
	- does SH4 need s special huge_ptep_get_and_clear()??

Acked-by: William Lee Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/mm/hugetlbpage.c | 195 ++++++------------------------------------
 1 file changed, 24 insertions(+), 171 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 5a1f831..625cbb3 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -22,7 +22,7 @@
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 
-static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -41,7 +41,7 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 	return pte;
 }
 
-static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -62,30 +62,34 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 
 #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
 
-static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
-			 unsigned long addr,
-			 struct page *page, pte_t * page_table, int write_access)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
 {
-	unsigned long i;
-	pte_t entry;
+	int i;
+
+	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+		set_pte_at(mm, addr, ptep, entry);
+		ptep++;
+		addr += PAGE_SIZE;
+		pte_val(entry) += PAGE_SIZE;
+	}
+}
 
-	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	pte_t entry;
+	int i;
 
-	if (write_access)
-		entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
-						       vma->vm_page_prot)));
-	else
-		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
-	entry = pte_mkyoung(entry);
-	mk_pte_huge(entry);
+	entry = *ptep;
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		set_pte_at(mm, addr, page_table, entry);
-		page_table++;
+		pte_clear(mm, addr, ptep);
 		addr += PAGE_SIZE;
-
-		pte_val(entry) += PAGE_SIZE;
+		ptep++;
 	}
+
+	return entry;
 }
 
 /*
@@ -100,79 +104,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
-			    struct vm_area_struct *vma)
-{
-	pte_t *src_pte, *dst_pte, entry;
-	struct page *ptepage;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	int i;
-
-	while (addr < end) {
-		dst_pte = huge_pte_alloc(dst, addr);
-		if (!dst_pte)
-			goto nomem;
-		src_pte = huge_pte_offset(src, addr);
-		BUG_ON(!src_pte || pte_none(*src_pte));
-		entry = *src_pte;
-		ptepage = pte_page(entry);
-		get_page(ptepage);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			set_pte_at(dst, addr, dst_pte, entry);
-			pte_val(entry) += PAGE_SIZE;
-			dst_pte++;
-			addr += PAGE_SIZE;
-		}
-		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
-	}
-	return 0;
-
-nomem:
-	return -ENOMEM;
-}
-
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i)
-{
-	unsigned long vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	while (vaddr < vma->vm_end && remainder) {
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			BUG_ON(!pte || pte_none(*pte));
-
-			page = pte_page(*pte);
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm,
 			      unsigned long address, int write)
 {
@@ -190,34 +121,6 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
-void unmap_hugepage_range(struct vm_area_struct *vma,
-			  unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	unsigned long address;
-	pte_t *pte;
-	struct page *page;
-	int i;
-
-	BUG_ON(start & (HPAGE_SIZE - 1));
-	BUG_ON(end & (HPAGE_SIZE - 1));
-
-	for (address = start; address < end; address += HPAGE_SIZE) {
-		pte = huge_pte_offset(mm, address);
-		BUG_ON(!pte);
-		if (pte_none(*pte))
-			continue;
-		page = pte_page(*pte);
-		put_page(page);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			pte_clear(mm, address+(i*PAGE_SIZE), pte);
-			pte++;
-		}
-	}
-	add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
-	flush_tlb_range(vma, start, end);
-}
-
 static void context_reload(void *__data)
 {
 	struct mm_struct *mm = __data;
@@ -226,12 +129,8 @@ static void context_reload(void *__data)
 		load_secondary_context(mm);
 }
 
-int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 {
-	struct mm_struct *mm = current->mm;
-	unsigned long addr;
-	int ret = 0;
-
 	/* On UltraSPARC-III+ and later, configure the second half of
 	 * the Data-TLB for huge pages.
 	 */
@@ -261,50 +160,4 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 		}
 		spin_unlock(&ctx_alloc_lock);
 	}
-
-	BUG_ON(vma->vm_start & ~HPAGE_MASK);
-	BUG_ON(vma->vm_end & ~HPAGE_MASK);
-
-	spin_lock(&mm->page_table_lock);
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
-		unsigned long idx;
-		pte_t *pte = huge_pte_alloc(mm, addr);
-		struct page *page;
-
-		if (!pte) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		if (!pte_none(*pte))
-			continue;
-
-		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
-			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
-		page = find_get_page(mapping, idx);
-		if (!page) {
-			/* charge the fs quota first */
-			if (hugetlb_get_quota(mapping)) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			page = alloc_huge_page();
-			if (!page) {
-				hugetlb_put_quota(mapping);
-				ret = -ENOMEM;
-				goto out;
-			}
-			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
-			if (! ret) {
-				unlock_page(page);
-			} else {
-				hugetlb_put_quota(mapping);
-				free_huge_page(page);
-				goto out;
-			}
-		}
-		set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
-	}
-out:
-	spin_unlock(&mm->page_table_lock);
-	return ret;
 }
-- 
cgit v1.1


From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001
From: Wolfgang Wander <wwc@rentec.com>
Date: Tue, 21 Jun 2005 17:14:49 -0700
Subject: [PATCH] Avoiding mmap fragmentation

Ingo recently introduced a great speedup for allocating new mmaps using the
free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and
causes huge performance increases in thread creation.

The downside of this patch is that it does lead to fragmentation in the
mmap-ed areas (visible via /proc/self/maps), such that some applications
that work fine under 2.4 kernels quickly run out of memory on any 2.6
kernel.

The problem is twofold:

  1) the free_area_cache is used to continue a search for memory where
     the last search ended.  Before the change new areas were always
     searched from the base address on.

     So now new small areas are cluttering holes of all sizes
     throughout the whole mmap-able region whereas before small holes
     tended to close holes near the base leaving holes far from the base
     large and available for larger requests.

  2) the free_area_cache also is set to the location of the last
     munmap-ed area so in scenarios where we allocate e.g.  five regions of
     1K each, then free regions 4 2 3 in this order the next request for 1K
     will be placed in the position of the old region 3, whereas before we
     appended it to the still active region 1, placing it at the location
     of the old region 2.  Before we had 1 free region of 2K, now we only
     get two free regions of 1K -> fragmentation.

The patch addresses thes issues by introducing yet another cache descriptor
cached_hole_size that contains the largest known hole size below the
current free_area_cache.  If a new request comes in the size is compared
against the cached_hole_size and if the request can be filled with a hole
below free_area_cache the search is started from the base instead.

The results look promising: Whereas 2.6.12-rc4 fragments quickly and my
(earlier posted) leakme.c test program terminates after 50000+ iterations
with 96 distinct and fragmented maps in /proc/self/maps it performs nicely
(as expected) with thread creation, Ingo's test_str02 with 20000 threads
requires 0.7s system time.

Taking out Ingo's patch (un-patch available per request) by basically
deleting all mentions of free_area_cache from the kernel and starting the
search for new memory always at the respective bases we observe: leakme
terminates successfully with 11 distinctive hardly fragmented areas in
/proc/self/maps but thread creating is gringdingly slow: 30+s(!) system
time for Ingo's test_str02 with 20000 threads.

Now - drumroll ;-) the appended patch works fine with leakme: it ends with
only 7 distinct areas in /proc/self/maps and also thread creation seems
sufficiently fast with 0.71s for 20000 threads.

Signed-off-by: Wolfgang Wander <wwc@rentec.com>
Credit-to: "Richard Purdie" <rpurdie@rpsys.net>
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu> (partly)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/kernel/sys_sparc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 0077f02..5f8c822 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -84,6 +84,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 			return addr;
 	}
 
+	if (len <= mm->cached_hole_size) {
+	        mm->cached_hole_size = 0;
+		mm->free_area_cache = TASK_UNMAPPED_BASE;
+	}
 	start_addr = addr = mm->free_area_cache;
 
 	task_size -= len;
@@ -103,6 +107,7 @@ full_search:
 		if (task_size < addr) {
 			if (start_addr != TASK_UNMAPPED_BASE) {
 				start_addr = addr = TASK_UNMAPPED_BASE;
+				mm->cached_hole_size = 0;
 				goto full_search;
 			}
 			return -ENOMEM;
@@ -114,6 +119,9 @@ full_search:
 			mm->free_area_cache = addr + len;
 			return addr;
 		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
 		addr = vma->vm_end;
 		if (do_color_align)
 			addr = COLOUR_ALIGN(addr, pgoff);
-- 
cgit v1.1


From 3f22ab276b931b72ea04b184c155b34d0362bfc3 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Thu, 23 Jun 2005 00:07:43 -0700
Subject: [PATCH] make each arch use mm/Kconfig

For all architectures, this just means that you'll see a "Memory Model"
choice in your architecture menu.  For those that implement DISCONTIGMEM,
you may eventually want to make your ARCH_DISCONTIGMEM_ENABLE a "def_bool
y" and make your users select DISCONTIGMEM right out of the new choice
menu.  The only disadvantage might be if you have some specific things that
you need in your help option to explain something about DISCONTIGMEM.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index a72fd15..e2b050e 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -484,6 +484,8 @@ config CMDLINE
 
 	  NOTE: This option WILL override the PROM bootargs setting!
 
+source "mm/Kconfig"
+
 endmenu
 
 source "drivers/base/Kconfig"
-- 
cgit v1.1


From 7e1048b11c5afe79aac46a42e3ccec86b8365c6d Mon Sep 17 00:00:00 2001
From: Rusty Lynch <rusty.lynch@intel.com>
Date: Thu, 23 Jun 2005 00:09:25 -0700
Subject: [PATCH] Move kprobe [dis]arming into arch specific code

The architecture independent code of the current kprobes implementation is
arming and disarming kprobes at registration time.  The problem is that the
code is assuming that arming and disarming is a just done by a simple write
of some magic value to an address.  This is problematic for ia64 where our
instructions look more like structures, and we can not insert break points
by just doing something like:

*p->addr = BREAKPOINT_INSTRUCTION;

The following patch to 2.6.12-rc4-mm2 adds two new architecture dependent
functions:

     * void arch_arm_kprobe(struct kprobe *p)
     * void arch_disarm_kprobe(struct kprobe *p)

and then adds the new functions for each of the architectures that already
implement kprobes (spar64/ppc64/i386/x86_64).

I thought arch_[dis]arm_kprobe was the most descriptive of what was really
happening, but each of the architectures already had a disarm_kprobe()
function that was really a "disarm and do some other clean-up items as
needed when you stumble across a recursive kprobe." So...  I took the
liberty of changing the code that was calling disarm_kprobe() to call
arch_disarm_kprobe(), and then do the cleanup in the block of code dealing
with the recursive kprobe case.

So far this patch as been tested on i386, x86_64, and ppc64, but still
needs to be tested in sparc64.

Signed-off-by: Rusty Lynch <rusty.lynch@intel.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/kernel/kprobes.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index 7066d7b..d67195b 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -6,7 +6,6 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
-
 #include <asm/kdebug.h>
 #include <asm/signal.h>
 
@@ -47,6 +46,19 @@ void arch_copy_kprobe(struct kprobe *p)
 {
 	p->ainsn.insn[0] = *p->addr;
 	p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2;
+	p->opcode = *p->addr;
+}
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flushi(p->addr);
+}
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flushi(p->addr);
 }
 
 void arch_remove_kprobe(struct kprobe *p)
@@ -78,17 +90,6 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 	}
 }
 
-static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs)
-{
-	*p->addr = p->opcode;
-	flushi(p->addr);
-
-	regs->tpc = (unsigned long) p->addr;
-	regs->tnpc = current_kprobe_orig_tnpc;
-	regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
-			current_kprobe_orig_tstate_pil);
-}
-
 static int kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p;
@@ -109,7 +110,11 @@ static int kprobe_handler(struct pt_regs *regs)
 				unlock_kprobes();
 				goto no_kprobe;
 			}
-			disarm_kprobe(p, regs);
+			arch_disarm_kprobe(p);
+			regs->tpc = (unsigned long) p->addr;
+			regs->tnpc = current_kprobe_orig_tnpc;
+			regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+					current_kprobe_orig_tstate_pil);
 			ret = 1;
 		} else {
 			p = current_kprobe;
-- 
cgit v1.1


From e539c2331414e73a5a1b79fb57369d79447c1cf8 Mon Sep 17 00:00:00 2001
From: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Date: Thu, 23 Jun 2005 00:09:39 -0700
Subject: [PATCH] kprobes: Temporary disarming of reentrant probe for sparc64

This patch includes sparc64 architecture specific changes to support temporary
disarming on reentrancy of probes.

Signed-of-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/kernel/kprobes.c | 62 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 13 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index d67195b..bdac631 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -65,19 +65,40 @@ void arch_remove_kprobe(struct kprobe *p)
 {
 }
 
-/* kprobe_status settings */
-#define KPROBE_HIT_ACTIVE	0x00000001
-#define KPROBE_HIT_SS		0x00000002
-
 static struct kprobe *current_kprobe;
 static unsigned long current_kprobe_orig_tnpc;
 static unsigned long current_kprobe_orig_tstate_pil;
 static unsigned int kprobe_status;
+static struct kprobe *kprobe_prev;
+static unsigned long kprobe_orig_tnpc_prev;
+static unsigned long kprobe_orig_tstate_pil_prev;
+static unsigned int kprobe_status_prev;
 
-static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static inline void save_previous_kprobe(void)
+{
+	kprobe_status_prev = kprobe_status;
+	kprobe_orig_tnpc_prev = current_kprobe_orig_tnpc;
+	kprobe_orig_tstate_pil_prev = current_kprobe_orig_tstate_pil;
+	kprobe_prev = current_kprobe;
+}
+
+static inline void restore_previous_kprobe(void)
+{
+	kprobe_status = kprobe_status_prev;
+	current_kprobe_orig_tnpc = kprobe_orig_tnpc_prev;
+	current_kprobe_orig_tstate_pil = kprobe_orig_tstate_pil_prev;
+	current_kprobe = kprobe_prev;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	current_kprobe_orig_tnpc = regs->tnpc;
 	current_kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL);
+	current_kprobe = p;
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
 	regs->tstate |= TSTATE_PIL;
 
 	/*single step inline, if it a breakpoint instruction*/
@@ -110,12 +131,18 @@ static int kprobe_handler(struct pt_regs *regs)
 				unlock_kprobes();
 				goto no_kprobe;
 			}
-			arch_disarm_kprobe(p);
-			regs->tpc = (unsigned long) p->addr;
-			regs->tnpc = current_kprobe_orig_tnpc;
-			regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
-					current_kprobe_orig_tstate_pil);
-			ret = 1;
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe();
+			set_current_kprobe(p, regs);
+			p->nmissed++;
+			kprobe_status = KPROBE_REENTER;
+			prepare_singlestep(p, regs);
+			return 1;
 		} else {
 			p = current_kprobe;
 			if (p->break_handler && p->break_handler(p, regs))
@@ -143,8 +170,8 @@ static int kprobe_handler(struct pt_regs *regs)
 		goto no_kprobe;
 	}
 
+	set_current_kprobe(p, regs);
 	kprobe_status = KPROBE_HIT_ACTIVE;
-	current_kprobe = p;
 	if (p->pre_handler && p->pre_handler(p, regs))
 		return 1;
 
@@ -250,12 +277,20 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
 	if (!kprobe_running())
 		return 0;
 
-	if (current_kprobe->post_handler)
+	if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
+		kprobe_status = KPROBE_HIT_SSDONE;
 		current_kprobe->post_handler(current_kprobe, regs, 0);
+	}
 
 	resume_execution(current_kprobe, regs);
 
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe();
+		goto out;
+	}
 	unlock_kprobes();
+out:
 	preempt_enable_no_resched();
 
 	return 1;
@@ -397,3 +432,4 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 	return 0;
 }
+
-- 
cgit v1.1


From 0d77e5a2c23da734f5a7925f64afa1c2ed92e0f9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 23 Jun 2005 00:10:14 -0700
Subject: [PATCH] compat: introduce compat_time_t

This patch is based on work by Carlos O'Donell and Matthew Wilcox.  It
introduces/updates the compat_time_t type and uses it for compat siginfo
structures.  I have built this on ppc64 and x86_64.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/kernel/signal32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index 9a375e97..f28428f4 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -102,7 +102,7 @@ typedef struct compat_siginfo{
 
 		/* POSIX.1b timers */
 		struct {
-			timer_t _tid;			/* timer id */
+			compat_timer_t _tid;			/* timer id */
 			int _overrun;			/* overrun count */
 			compat_sigval_t _sigval;		/* same as below */
 			int _sys_private;		/* not to be passed to user */
-- 
cgit v1.1


From b445e26cbf784cdba10f2b6c3e2cd3ee7bab360a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Jun 2005 15:42:04 -0700
Subject: [SPARC64]: Avoid membar instructions in delay slots.

In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.S      |   6 ++-
 arch/sparc64/kernel/semaphore.c  |  12 +++--
 arch/sparc64/kernel/trampoline.S |   3 +-
 arch/sparc64/lib/U1memcpy.S      | 103 ++++++++++++++++++++-------------------
 arch/sparc64/lib/VISsave.S       |  15 +++++-
 arch/sparc64/lib/atomic.S        |  42 ++++++++++------
 arch/sparc64/lib/bitops.S        |  31 +++++++-----
 arch/sparc64/lib/debuglocks.c    |   6 ++-
 arch/sparc64/lib/dec_and_lock.S  |   6 ++-
 arch/sparc64/lib/rwsem.S         |  15 ++++--
 arch/sparc64/mm/init.c           |   6 ++-
 arch/sparc64/mm/ultra.S          |   3 +-
 12 files changed, 150 insertions(+), 98 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a47f2d0..ffe717a 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
 	fmuld		%f0, %f2, %f26
 	faddd		%f0, %f2, %f28
 	fmuld		%f0, %f2, %f30
+	membar		#Sync
 	b,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 2:	andcc		%g5, FPRS_DU, %g0
 	bne,pt		%icc, 3f
 	 fzero		%f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
 	fmuld		%f32, %f34, %f58
 	faddd		%f32, %f34, %f60
 	fmuld		%f32, %f34, %f62
+	membar		#Sync
 	ba,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 3:	mov		SECONDARY_CONTEXT, %g3
 	add		%g6, TI_FPREGS, %g1
 	ldxa		[%g3] ASI_DMMU, %g5
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
index 63496c4..a809e63 100644
--- a/arch/sparc64/kernel/semaphore.c
+++ b/arch/sparc64/kernel/semaphore.c
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
 "	add	%1, %4, %1\n"
 "	cas	[%3], %0, %1\n"
 "	cmp	%0, %1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bne,pn	%%icc, 1b\n"
-"	 membar #StoreLoad | #StoreStore\n"
+"	 nop\n"
 	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 	: "r" (&sem->count), "r" (incr), "m" (sem->count)
 	: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 addcc	%%g7, 1, %%g0\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	ble,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%2, %%g1\n"
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 2c8f934..3a145fc 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -98,8 +98,9 @@ startup_continue:
 
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
+	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
-	 membar		#StoreLoad | #StoreStore
+	 nop
 
 	sethi		%hi(p1275buf), %g2
 	or		%g2, %lo(p1275buf), %g2
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520..bafd2fc 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
 #define LOOP_CHUNK3(src, dest, len, branch_dest)		\
 	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
 
+#define DO_SYNC			membar	#Sync;
 #define STORE_SYNC(dest, fsrc)				\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
-	add			%dest, 0x40, %dest;
+	add			%dest, 0x40, %dest;	\
+	DO_SYNC
 
 #define STORE_JUMP(dest, fsrc, target)			\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
 	add			%dest, 0x40, %dest;	\
-	ba,pt			%xcc, target;
+	ba,pt			%xcc, target;		\
+	 nop;
 
 #define FINISH_VISCHUNK(dest, f0, f1, left)	\
 	subcc			%left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f0, %f2, %f48
 1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_JUMP(o0, f48, 40f) membar #Sync
+	STORE_JUMP(o0, f48, 40f)
 2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_JUMP(o0, f48, 48f) membar #Sync
+	STORE_JUMP(o0, f48, 48f)
 3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_JUMP(o0, f48, 56f) membar #Sync
+	STORE_JUMP(o0, f48, 56f)
 
 1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f2, %f4, %f48
 1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_JUMP(o0, f48, 41f) membar #Sync
+	STORE_JUMP(o0, f48, 41f)
 2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_JUMP(o0, f48, 49f) membar #Sync
+	STORE_JUMP(o0, f48, 49f)
 3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_JUMP(o0, f48, 57f) membar #Sync
+	STORE_JUMP(o0, f48, 57f)
 
 1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f4, %f6, %f48
 1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_JUMP(o0, f48, 42f) membar #Sync
+	STORE_JUMP(o0, f48, 42f)
 2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_JUMP(o0, f48, 50f) membar #Sync
+	STORE_JUMP(o0, f48, 50f)
 3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_JUMP(o0, f48, 58f) membar #Sync
+	STORE_JUMP(o0, f48, 58f)
 
 1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f6, %f8, %f48
 1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_JUMP(o0, f48, 43f) membar #Sync
+	STORE_JUMP(o0, f48, 43f)
 2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_JUMP(o0, f48, 51f) membar #Sync
+	STORE_JUMP(o0, f48, 51f)
 3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_JUMP(o0, f48, 59f) membar #Sync
+	STORE_JUMP(o0, f48, 59f)
 
 1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f8, %f10, %f48
 1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_JUMP(o0, f48, 44f) membar #Sync
+	STORE_JUMP(o0, f48, 44f)
 2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_JUMP(o0, f48, 52f) membar #Sync
+	STORE_JUMP(o0, f48, 52f)
 3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_JUMP(o0, f48, 60f) membar #Sync
+	STORE_JUMP(o0, f48, 60f)
 
 1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f10, %f12, %f48
 1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_JUMP(o0, f48, 45f) membar #Sync
+	STORE_JUMP(o0, f48, 45f)
 2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_JUMP(o0, f48, 53f) membar #Sync
+	STORE_JUMP(o0, f48, 53f)
 3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_JUMP(o0, f48, 61f) membar #Sync
+	STORE_JUMP(o0, f48, 61f)
 
 1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f12, %f14, %f48
 1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_JUMP(o0, f48, 46f) membar #Sync
+	STORE_JUMP(o0, f48, 46f)
 2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_JUMP(o0, f48, 54f) membar #Sync
+	STORE_JUMP(o0, f48, 54f)
 3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_JUMP(o0, f48, 62f) membar #Sync
+	STORE_JUMP(o0, f48, 62f)
 
 1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f14, %f16, %f48
 1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_JUMP(o0, f48, 47f) membar #Sync
+	STORE_JUMP(o0, f48, 47f)
 2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_JUMP(o0, f48, 55f) membar #Sync
+	STORE_JUMP(o0, f48, 55f)
 3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_JUMP(o0, f48, 63f) membar #Sync
+	STORE_JUMP(o0, f48, 63f)
 
 40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
 41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
index 65e328d..4e18989 100644
--- a/arch/sparc64/lib/VISsave.S
+++ b/arch/sparc64/lib/VISsave.S
@@ -72,7 +72,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 5:	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
+
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 
 6:	ldub		[%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
 
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 
 	.align		32
@@ -126,6 +133,10 @@ VISenterhalf:
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
+	ba,pt		%xcc, 4f
+	 nop
+
+	.align		32
 4:	and		%o5, FPRS_DU, %o5
 	jmpl		%g7 + %g0, %g0
 	 wr		%o5, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index e528b8d..faf87c3 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -7,18 +7,6 @@
 #include <linux/config.h>
 #include <asm/asi.h>
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-#ifdef CONFIG_SMP
-#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define ATOMIC_POST_BARRIER	membar #StoreLoad | #StoreStore
-#else
-#define ATOMIC_PRE_BARRIER	nop
-#define ATOMIC_POST_BARRIER	nop
-#endif
-
 	.text
 
 	/* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	 nop
 	.size	atomic_sub, .-atomic_sub
 
+	/* On SMP we need to use memory barriers to ensure
+	 * correct memory operation ordering, nop these out
+	 * for uniprocessor.
+	 */
+#ifdef CONFIG_SMP
+
+#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad;
+#define ATOMIC_POST_BARRIER	\
+	ba,pt %xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
+#else
+#define ATOMIC_PRE_BARRIER
+#define ATOMIC_POST_BARRIER
+#endif
+
 	.globl	atomic_add_ret
 	.type	atomic_add_ret,#function
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	 add	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_add_ret, .-atomic_add_ret
 
 	.globl	atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	 sub	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_sub_ret, .-atomic_sub_ret
 
 	.globl	atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	 add	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_add_ret, .-atomic64_add_ret
 
 	.globl	atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	 sub	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_sub_ret, .-atomic64_sub_ret
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S
index 886dcd2..31afbfe 100644
--- a/arch/sparc64/lib/bitops.S
+++ b/arch/sparc64/lib/bitops.S
@@ -7,20 +7,26 @@
 #include <linux/config.h>
 #include <asm/asi.h>
 
+	.text
+
 	/* On SMP we need to use memory barriers to ensure
 	 * correct memory operation ordering, nop these out
 	 * for uniprocessor.
 	 */
+
 #ifdef CONFIG_SMP
 #define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER	membar #StoreLoad | #StoreStore
+#define BITOP_POST_BARRIER	\
+	ba,pt	%xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
 #else
-#define BITOP_PRE_BARRIER	nop
-#define BITOP_POST_BARRIER	nop
+#define BITOP_PRE_BARRIER
+#define BITOP_POST_BARRIER
 #endif
 
-	.text
-
 	.globl	test_and_set_bit
 	.type	test_and_set_bit,#function
 test_and_set_bit:	/* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_set_bit, .-test_and_set_bit
 
 	.globl	test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_clear_bit, .-test_and_clear_bit
 
 	.globl	test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_change_bit, .-test_and_change_bit
 
 	.globl	set_bit
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index c421e0c..f03344c 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -252,8 +252,9 @@ wlock_again:
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S
index 7e6fdae..8ee288d 100644
--- a/arch/sparc64/lib/dec_and_lock.S
+++ b/arch/sparc64/lib/dec_and_lock.S
@@ -48,8 +48,9 @@ start_to_zero:
 #endif
 to_zero:
 	ldstub	[%o1], %g3
+	membar	#StoreLoad | #StoreStore
 	brnz,pn	%g3, spin_on_lock
-	 membar	#StoreLoad | #StoreStore
+	 nop
 loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	cmp	%g2, %g7
 
@@ -71,8 +72,9 @@ loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	 nop
 spin_on_lock:
 	ldub	[%o1], %g3
+	membar	#LoadLoad
 	brnz,pt	%g3, spin_on_lock
-	 membar	#LoadLoad
+	 nop
 	ba,pt	%xcc, to_zero
 	 nop
 	nop
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S
index 174ff7b..75f0e6b 100644
--- a/arch/sparc64/lib/rwsem.S
+++ b/arch/sparc64/lib/rwsem.S
@@ -17,8 +17,9 @@ __down_read:
 	bne,pn		%icc, 1b
 	 add		%g7, 1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop
@@ -57,8 +58,9 @@ __down_write:
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bne,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 	 nop
 3:
@@ -97,8 +99,9 @@ __up_read:
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 	 nop
 3:	sethi		%hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop
@@ -151,8 +155,9 @@ __downgrade_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 9c52220..8fc413c 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
 			     "or	%%g1, %0, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore"
+			     " nop"
 			     : /* no outputs */
 			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
 			     : "g1", "g7");
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
 			     " andn	%%g7, %1, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore\n"
+			     " nop\n"
 			     "2:"
 			     : /* no outputs */
 			     : "r" (cpu), "r" (mask), "r" (&page->flags),
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 7a09343..7a2431d 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending:	/* 22 insns */
 	 andn		%o3, 1, %o3
 	stxa		%g0, [%o3] ASI_IMMU_DEMAP
 2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	membar		#Sync
 	brnz,pt		%o1, 1b
-	 membar		#Sync
+	 nop
 	stxa		%g2, [%o4] ASI_DMMU
 	flush		%g6
 	wrpr		%g0, 0, %tl
-- 
cgit v1.1


From 63b614522cba5a015923c0e8f284be6e01c13f1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Jun 2005 17:04:45 -0700
Subject: [SPARC64]: Get rid of fast IRQ feature.

The only real user was the assembler floppy interrupt
handler, which does not need to be in assembly.

This makes it so that there are less pieces of code which
know about the internal layout of ivector_table[] and
friends.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/auxio.c         |   2 +-
 arch/sparc64/kernel/entry.S         | 110 -----------------------
 arch/sparc64/kernel/irq.c           | 171 +++++++++++-------------------------
 arch/sparc64/kernel/sparc64_ksyms.c |   1 -
 4 files changed, 51 insertions(+), 233 deletions(-)

(limited to 'arch/sparc64')

diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c
index a0716cc..8852c20 100644
--- a/arch/sparc64/kernel/auxio.c
+++ b/arch/sparc64/kernel/auxio.c
@@ -16,7 +16,7 @@
 #include <asm/ebus.h>
 #include <asm/auxio.h>
 
-/* This cannot be static, as it is referenced in entry.S */
+/* This cannot be static, as it is referenced in irq.c */
 void __iomem *auxio_register = NULL;
 
 enum auxio_type {
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index ffe717a..eee516a 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -701,116 +701,6 @@ utrap_ill:
 	ba,pt		%xcc, rtrap
 	 clr		%l6
 
-#ifdef CONFIG_BLK_DEV_FD
-	.globl		floppy_hardint
-floppy_hardint:
-	wr		%g0, (1 << 11), %clear_softint
-	sethi		%hi(doing_pdma), %g1
-	ld		[%g1 + %lo(doing_pdma)], %g2
-	brz,pn		%g2, floppy_dosoftint
-	 sethi		%hi(fdc_status), %g3
-	ldx		[%g3 + %lo(fdc_status)], %g3
-	sethi		%hi(pdma_vaddr), %g5
-	ldx		[%g5 + %lo(pdma_vaddr)], %g4
-	sethi		%hi(pdma_size), %g5
-	ldx		[%g5 + %lo(pdma_size)], %g5
-
-next_byte:
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	andcc		%g7, 0x80, %g0
-	be,pn		%icc, floppy_fifo_emptied
-	 andcc		%g7, 0x20, %g0
-	be,pn		%icc, floppy_overrun
-	 andcc		%g7, 0x40, %g0
-	be,pn		%icc, floppy_write
-	 sub		%g5, 1, %g5
-
-	inc		%g3
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	dec		%g3
-	orcc		%g0, %g5, %g0
-	stb		%g7, [%g4]
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-	b,pt		%xcc, floppy_tdone
-	 nop
-
-floppy_write:
-	ldub		[%g4], %g7
-	orcc		%g0, %g5, %g0
-	inc		%g3
-	stba		%g7, [%g3] ASI_PHYS_BYPASS_EC_E
-	dec		%g3
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-floppy_tdone:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(auxio_register), %g1
-	ldx		[%g1 + %lo(auxio_register)], %g7
-	lduba		[%g7] ASI_PHYS_BYPASS_EC_E, %g5
-	or		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	andn		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-
-	nop; nop;  nop; nop;  nop; nop;
-	nop; nop;  nop; nop;  nop; nop;
-
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	sethi		%hi(doing_pdma), %g1
-	b,pt		%xcc, floppy_dosoftint
-	 st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_fifo_emptied:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(irq_action), %g1
-	or		%g1, %lo(irq_action), %g1
-	ldx		[%g1 + (11 << 3)], %g3		! irqaction[floppy_irq]
-	ldx		[%g3 + 0x08], %g4		! action->flags>>48==ino
-	sethi		%hi(ivector_table), %g3
-	srlx		%g4, 48, %g4
-	or		%g3, %lo(ivector_table), %g3
-	sllx		%g4, 5, %g4
-	ldx		[%g3 + %g4], %g4		! &ivector_table[ino]
-	ldx		[%g4 + 0x10], %g4		! bucket->iclr
-	stwa		%g0, [%g4] ASI_PHYS_BYPASS_EC_E	! ICLR_IDLE
-	membar		#Sync				! probably not needed...
-	retry
-
-floppy_overrun:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(doing_pdma), %g1
-	st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_dosoftint:
-	rdpr		%pil, %g2
-	wrpr		%g0, 15, %pil
-	sethi		%hi(109f), %g7
-	b,pt		%xcc, etrap_irq
-109:	 or		%g7, %lo(109b), %g7
-
-	mov		11, %o0
-	mov		0, %o1
-	call		sparc_floppy_irq
-	 add		%sp, PTREGS_OFF, %o2
-
-	b,pt		%xcc, rtrap_irq
-	 nop
-
-#endif /* CONFIG_BLK_DEV_FD */
-
 	/* XXX Here is stuff we still need to write... -DaveM XXX */
 	.globl		netbsd_syscall
 netbsd_syscall:
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 4dcb8af..4247125 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -37,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cpudata.h>
+#include <asm/auxio.h>
 
 #ifdef CONFIG_SMP
 static void distribute_irqs(void);
@@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs)
 }
 
 #ifdef CONFIG_BLK_DEV_FD
-extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs);
+extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);;
 
-void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
-{
-	struct irqaction *action = *(irq + irq_action);
-	struct ino_bucket *bucket;
-	int cpu = smp_processor_id();
-
-	irq_enter();
-	kstat_this_cpu.irqs[irq]++;
-
-	*(irq_work(cpu, irq)) = 0;
-	bucket = get_ino_in_irqaction(action) + ivector_table;
-
-	bucket->flags |= IBF_INPROGRESS;
-
-	floppy_interrupt(irq, dev_cookie, regs);
-	upa_writel(ICLR_IDLE, bucket->iclr);
-
-	bucket->flags &= ~IBF_INPROGRESS;
-
-	irq_exit();
-}
-#endif
-
-/* The following assumes that the branch lies before the place we
- * are branching to.  This is the case for a trap vector...
- * You have been warned.
- */
-#define SPARC_BRANCH(dest_addr, inst_addr) \
-          (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff))
-
-#define SPARC_NOP (0x01000000)
+/* XXX No easy way to include asm/floppy.h XXX */
+extern unsigned char *pdma_vaddr;
+extern unsigned long pdma_size;
+extern volatile int doing_pdma;
+extern unsigned long fdc_status;
 
-static void install_fast_irq(unsigned int cpu_irq,
-			     irqreturn_t (*handler)(int, void *, struct pt_regs *))
+irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
 {
-	extern unsigned long sparc64_ttable_tl0;
-	unsigned long ttent = (unsigned long) &sparc64_ttable_tl0;
-	unsigned int *insns;
-
-	ttent += 0x820;
-	ttent += (cpu_irq - 1) << 5;
-	insns = (unsigned int *) ttent;
-	insns[0] = SPARC_BRANCH(((unsigned long) handler),
-				((unsigned long)&insns[0]));
-	insns[1] = SPARC_NOP;
-	__asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent));
-}
-
-int request_fast_irq(unsigned int irq,
-		     irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		     unsigned long irqflags, const char *name, void *dev_id)
-{
-	struct irqaction *action;
-	struct ino_bucket *bucket = __bucket(irq);
-	unsigned long flags;
-
-	/* No pil0 dummy buckets allowed here. */
-	if (bucket < &ivector_table[0] ||
-	    bucket >= &ivector_table[NUM_IVECS]) {
-		unsigned int *caller;
-
-		__asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-		printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt "
-		       "from %p, irq %08x.\n", caller, irq);
-		return -EINVAL;
-	}	
-	
-	if (!handler)
-		return -EINVAL;
+	if (likely(doing_pdma)) {
+		void __iomem *stat = (void __iomem *) fdc_status;
+		unsigned char *vaddr = pdma_vaddr;
+		unsigned long size = pdma_size;
+		u8 val;
+
+		while (size) {
+			val = readb(stat);
+			if (unlikely(!(val & 0x80))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				return IRQ_HANDLED;
+			}
+			if (unlikely(!(val & 0x20))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				doing_pdma = 0;
+				goto main_interrupt;
+			}
+			if (val & 0x40) {
+				/* read */
+				*vaddr++ = readb(stat + 1);
+			} else {
+				unsigned char data = *vaddr++;
 
-	if ((bucket->pil == 0) || (bucket->pil == 14)) {
-		printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n");
-		return -EBUSY;
-	}
+				/* write */
+				writeb(data, stat + 1);
+			}
+			size--;
+		}
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+		pdma_vaddr = vaddr;
+		pdma_size = size;
 
-	action = *(bucket->pil + irq_action);
-	if (action) {
-		if (action->flags & SA_SHIRQ)
-			panic("Trying to register fast irq when already shared.\n");
-		if (irqflags & SA_SHIRQ)
-			panic("Trying to register fast irq as shared.\n");
-		printk("request_fast_irq: Trying to register yet already owned.\n");
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -EBUSY;
-	}
+		/* Send Terminal Count pulse to floppy controller. */
+		val = readb(auxio_register);
+		val |= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
+		val &= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
 
-	/*
-	 * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we
-	 * support smp intr affinity in this path.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
-			       "using kmalloc\n", bucket->pil, name);
-	}
-	if (action == NULL)
-		action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-						     GFP_ATOMIC);
-	if (!action) {
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -ENOMEM;
+		doing_pdma = 0;
 	}
-	install_fast_irq(bucket->pil, handler);
 
-	bucket->irq_info = action;
-	bucket->flags |= IBF_ACTIVE;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->dev_id = NULL;
-	action->name = name;
-	action->next = NULL;
-	put_ino_in_irqaction(action, irq);
-	put_smpaff_in_irqaction(action, CPU_MASK_NONE);
-
-	*(bucket->pil + irq_action) = action;
-	enable_irq(irq);
-
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-
-#ifdef CONFIG_SMP
-	distribute_irqs();
-#endif
-	return 0;
+main_interrupt:
+	return floppy_interrupt(irq, dev_cookie, regs);
 }
+EXPORT_SYMBOL(sparc_floppy_irq);
+#endif
 
 /* We really don't need these at all on the Sparc.  We only have
  * stubs here because they are exported to modules.
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index e78cc53..56cd96f 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 EXPORT_SYMBOL(mostek_lock);
 EXPORT_SYMBOL(mstk48t02_regs);
-EXPORT_SYMBOL(request_fast_irq);
 #ifdef CONFIG_SUN_AUXIO
 EXPORT_SYMBOL(auxio_set_led);
 EXPORT_SYMBOL(auxio_set_lte);
-- 
cgit v1.1