From 3ac4c949e02f26be1e4378f9acfb07ec87db947b Mon Sep 17 00:00:00 2001 From: Robert Reif Date: Fri, 10 Aug 2007 15:52:06 -0700 Subject: [SPARC32]: Remove iommu from struct sbus_bus and use archdata like sparc64. Signed-off-by: Robert Reif Signed-off-by: David S. Miller --- arch/sparc/kernel/ebus.c | 1 + arch/sparc/mm/io-unit.c | 18 +++++++++--------- arch/sparc/mm/iommu.c | 12 ++++++------ 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index ac352eb..e2d02fd 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c @@ -238,6 +238,7 @@ void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *d sd = &dev->ofdev.dev.archdata; sd->prom_node = dp; sd->op = &dev->ofdev; + sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; dev->ofdev.node = dp; dev->ofdev.dev.parent = &dev->bus->ofdev.dev; diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 4ccda77..7c89893 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -66,7 +66,7 @@ iounit_init(int sbi_node, int io_node, struct sbus_bus *sbus) } if(!xpt) panic("Cannot map External Page Table."); - sbus->iommu = (struct iommu_struct *)iounit; + sbus->ofdev.dev.archdata.iommu = iounit; iounit->page_table = xpt; spin_lock_init(&iounit->lock); @@ -127,7 +127,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long ret, flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); ret = iounit_get_area(iounit, (unsigned long)vaddr, len); @@ -138,7 +138,7 @@ static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; /* FIXME: Cache some resolved pages - often several sg entries are to the same page */ spin_lock_irqsave(&iounit->lock, flags); @@ -153,7 +153,7 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus static void iounit_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); len = ((vaddr & ~PAGE_MASK) + len + (PAGE_SIZE-1)) >> PAGE_SHIFT; @@ -168,7 +168,7 @@ static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_ { unsigned long flags; unsigned long vaddr, len; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { @@ -211,7 +211,7 @@ static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, in i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); for_each_sbus(sbus) { - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iopte = (iopte_t *)(iounit->page_table + i); *iopte = MKIOPTE(__pa(page)); @@ -235,7 +235,7 @@ static void iounit_unmap_dma_area(unsigned long addr, int len) static struct page *iounit_translate_dvma(unsigned long addr) { struct sbus_bus *sbus = sbus_root; /* They are all the same */ - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; int i; iopte_t *iopte; @@ -279,7 +279,7 @@ __u32 iounit_map_dma_init(struct sbus_bus *sbus, int size) unsigned long rotor, scan, limit; unsigned long flags; __u32 ret; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; npages = (size + (PAGE_SIZE-1)) >> PAGE_SHIFT; i = 0x0213; @@ -315,7 +315,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); __u32 iounit_map_dma_page(__u32 vaddr, void *addr, struct sbus_bus *sbus) { int scan = (vaddr - IOUNIT_DMA_BASE) >> PAGE_SHIFT; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iounit->page_table[scan] = MKIOPTE(__pa(((unsigned long)addr) & PAGE_MASK)); return vaddr + (((unsigned long)addr) & ~PAGE_MASK); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index be042ef..52e907a 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -132,7 +132,7 @@ iommu_init(int iommund, struct sbus_bus *sbus) impl, vers, iommu->page_table, (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); - sbus->iommu = iommu; + sbus->ofdev.dev.archdata.iommu = iommu; } /* This begs to be btfixup-ed by srmmu. */ @@ -166,7 +166,7 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; iopte_t *iopte, *iopte0; unsigned int busa, busa0; @@ -291,7 +291,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; int i; @@ -334,7 +334,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, unsigned long addr, int len) { unsigned long page, end; - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte_t *first; int ioptex; @@ -399,7 +399,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, static void iommu_unmap_dma_area(unsigned long busa, int len) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; unsigned long end; int ioptex = (busa - iommu->start) >> PAGE_SHIFT; @@ -420,7 +420,7 @@ static void iommu_unmap_dma_area(unsigned long busa, int len) static struct page *iommu_translate_dvma(unsigned long busa) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte += ((busa - iommu->start) >> PAGE_SHIFT); -- cgit v1.1 From f642b263800e6e57c377d630be6d2a999683b579 Mon Sep 17 00:00:00 2001 From: Mark Fortescue Date: Tue, 14 Aug 2007 18:22:03 -0700 Subject: [SPARC32]: Fix bogus ramdisk image location check. This mirrors sparc64 commit 715a0ecc29c850d2b2f76e1803d3f22cd5a0ac0d sparc_ramdisk_image should always be decremented by KERNBASE. Signed-off-by: Mark Fortescue Signed-off-by: David S. Miller --- arch/sparc/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index a1bef07..c13e6cd 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -206,8 +206,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) #ifdef CONFIG_BLK_DEV_INITRD /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ if (sparc_ramdisk_image) { - if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) - sparc_ramdisk_image -= KERNBASE; + sparc_ramdisk_image -= KERNBASE; initrd_start = sparc_ramdisk_image + phys_base; initrd_end = initrd_start + sparc_ramdisk_size; if (initrd_end > end_of_phys_memory) { -- cgit v1.1 From 70b0e7a919b7961285c685a87928ed78c9fb07f0 Mon Sep 17 00:00:00 2001 From: Mark Fortescue Date: Tue, 14 Aug 2007 18:24:10 -0700 Subject: [SPARC32]: Remove superfluous 'kernel_end' alignment on sun4c. In sun4c_init_clean_mmu(), aligning 'kernel_end' using SUN4C_REAL_PGDIR_ALIGN() is unnecessary since the caller does this already. In sun4c_paging_init(), 4 page sizes of "fluff" were added to the address of &end. This was necessary a long time ago when sparc32 would allocate some early data structures by carving out memory chunks after &end but that no longer occurs. Signed-off-by: Mark Fortescue Signed-off-by: David S. Miller --- arch/sparc/mm/sun4c.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 79d60d8..005a3e7 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -268,7 +268,6 @@ static inline void sun4c_init_clean_mmu(unsigned long kernel_end) unsigned char savectx, ctx; savectx = sun4c_get_context(); - kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); for (ctx = 0; ctx < num_contexts; ctx++) { sun4c_set_context(ctx); for (vaddr = 0; vaddr < 0x20000000; vaddr += SUN4C_REAL_PGDIR_SIZE) @@ -2064,7 +2063,6 @@ void __init sun4c_paging_init(void) unsigned long end_pfn, pages_avail; kernel_end = (unsigned long) &end; - kernel_end += (SUN4C_REAL_PGDIR_SIZE * 4); kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); pages_avail = 0; -- cgit v1.1 From 0fdb7f96d8c27e37ed2ca1ae5a763baf14b8fe0c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 15 Aug 2007 21:02:23 -0700 Subject: [SPARC64]: Allow userspace to get at the machine description. Like the OF device tree, it's useful to let userland get at the machine description so it can pretty print the graph etc. The implementation is a simple MISC device with a read method. Signed-off-by: David S. Miller --- arch/sparc64/kernel/mdesc.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'arch') diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index 95059c2..9f22e4f 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -836,6 +837,43 @@ void __devinit mdesc_fill_in_cpu_data(cpumask_t mask) mdesc_release(hp); } +static ssize_t mdesc_read(struct file *file, char __user *buf, + size_t len, loff_t *offp) +{ + struct mdesc_handle *hp = mdesc_grab(); + int err; + + if (!hp) + return -ENODEV; + + err = hp->handle_size; + if (len < hp->handle_size) + err = -EMSGSIZE; + else if (copy_to_user(buf, &hp->mdesc, hp->handle_size)) + err = -EFAULT; + mdesc_release(hp); + + return err; +} + +static const struct file_operations mdesc_fops = { + .read = mdesc_read, + .owner = THIS_MODULE, +}; + +static struct miscdevice mdesc_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mdesc", + .fops = &mdesc_fops, +}; + +static int __init mdesc_misc_init(void) +{ + return misc_register(&mdesc_misc); +} + +__initcall(mdesc_misc_init); + void __init sun4v_mdesc_init(void) { struct mdesc_handle *hp; -- cgit v1.1 From cf5adce11743e98739fcb97e76d688f0b0bc2199 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:47:25 -0700 Subject: [SPARC64]: Niagara-2 optimized copies. The bzero/memset implementation stays the same as Niagara-1. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 11 +- arch/sparc64/lib/Makefile | 2 + arch/sparc64/lib/NG2copy_from_user.S | 40 +++ arch/sparc64/lib/NG2copy_to_user.S | 49 ++++ arch/sparc64/lib/NG2memcpy.S | 520 +++++++++++++++++++++++++++++++++++ arch/sparc64/lib/NG2page.S | 61 ++++ arch/sparc64/lib/NG2patch.S | 33 +++ arch/sparc64/lib/NGpage.S | 1 + 8 files changed, 716 insertions(+), 1 deletion(-) create mode 100644 arch/sparc64/lib/NG2copy_from_user.S create mode 100644 arch/sparc64/lib/NG2copy_to_user.S create mode 100644 arch/sparc64/lib/NG2memcpy.S create mode 100644 arch/sparc64/lib/NG2page.S create mode 100644 arch/sparc64/lib/NG2patch.S (limited to 'arch') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index ac18bd8..a2652bc 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -501,7 +501,7 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA1 be,pt %xcc, niagara_patch cmp %g1, SUN4V_CHIP_NIAGARA2 - be,pt %xcc, niagara_patch + be,pt %xcc, niagara2_patch nop call generic_patch_copyops @@ -512,6 +512,15 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f +niagara2_patch: + call niagara2_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara2_patch_pageops + nop + + ba,a,pt %xcc, 80f niagara_patch: call niagara_patch_copyops diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index f95fbfa..f095e13 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -13,6 +13,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ NGpage.o NGbzero.o \ + NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o NG2patch.o \ + NG2page.o \ GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \ GENpage.o GENbzero.o \ copy_in_user.o user_fixup.o memmove.o \ diff --git a/arch/sparc64/lib/NG2copy_from_user.S b/arch/sparc64/lib/NG2copy_from_user.S new file mode 100644 index 0000000..c77ef5f --- /dev/null +++ b/arch/sparc64/lib/NG2copy_from_user.S @@ -0,0 +1,40 @@ +/* NG2copy_from_user.S: Niagara-2 optimized copy from userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_AIUS_4V +#define ASI_BLK_AIUS_4V 0x17 +#endif + +#define FUNC_NAME NG2copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS_4V, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NG2memcpy.S" diff --git a/arch/sparc64/lib/NG2copy_to_user.S b/arch/sparc64/lib/NG2copy_to_user.S new file mode 100644 index 0000000..4bd4093 --- /dev/null +++ b/arch/sparc64/lib/NG2copy_to_user.S @@ -0,0 +1,49 @@ +/* NG2copy_to_user.S: Niagara-2 optimized copy to userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_AIUS_4V +#define ASI_BLK_AIUS_4V 0x17 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME NG2copy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS_4V +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NG2memcpy.S" diff --git a/arch/sparc64/lib/NG2memcpy.S b/arch/sparc64/lib/NG2memcpy.S new file mode 100644 index 0000000..0aed756 --- /dev/null +++ b/arch/sparc64/lib/NG2memcpy.S @@ -0,0 +1,520 @@ +/* NG2memcpy.S: Niagara-2 optimized memcpy. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include +#include +#define GLOBAL_SPARE %g7 +#else +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define FPRS_FEF 0x04 +#ifdef MEMCPY_DEBUG +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ + clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif +#define GLOBAL_SPARE %g5 +#endif + +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef LOAD_BLK +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest +#endif + +#ifndef STORE +#ifndef MEMCPY_DEBUG +#define STORE(type,src,addr) type src, [addr] +#else +#define STORE(type,src,addr) type##a src, [addr] 0x80 +#endif +#endif + +#ifndef STORE_BLK +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NG2memcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + +#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + faligndata %x0, %x1, %f0; \ + faligndata %x1, %x2, %f2; \ + faligndata %x2, %x3, %f4; \ + faligndata %x3, %x4, %f6; \ + faligndata %x4, %x5, %f8; \ + faligndata %x5, %x6, %f10; \ + faligndata %x6, %x7, %f12; \ + faligndata %x7, %x8, %f14; + +#define FREG_MOVE_1(x0) \ + fmovd %x0, %f0; +#define FREG_MOVE_2(x0, x1) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; +#define FREG_MOVE_3(x0, x1, x2) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; +#define FREG_MOVE_4(x0, x1, x2, x3) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; +#define FREG_MOVE_5(x0, x1, x2, x3, x4) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; +#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; +#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; \ + fmovd %x6, %f12; +#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; \ + fmovd %x6, %f12; \ + fmovd %x7, %f14; +#define FREG_LOAD_1(base, x0) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)) +#define FREG_LOAD_2(base, x0, x1) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); +#define FREG_LOAD_3(base, x0, x1, x2) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); +#define FREG_LOAD_4(base, x0, x1, x2, x3) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); +#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); +#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); \ + EX_LD(LOAD(ldd, base + 0x28, %x5)); +#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); \ + EX_LD(LOAD(ldd, base + 0x28, %x5)); \ + EX_LD(LOAD(ldd, base + 0x30, %x6)); + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, GLOBAL_SPARE + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + * + * However, the cut-off point, performance wise, is around + * 4 64-byte blocks. + */ + cmp %o2, (4 * 64) + blu,pt %XCC, 75f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops can use %o4, %g2, %g3 as + * temporaries while copying the data. %o5 must + * be preserved between VISEntryHalf and VISExitHalf + */ + + LOAD(prefetch, %o1 + 0x000, #one_read) + LOAD(prefetch, %o1 + 0x040, #one_read) + LOAD(prefetch, %o1 + 0x080, #one_read) + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o0)) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + +2: + /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve + * o5 from here until we hit VISExitHalf. + */ + VISEntryHalf + + alignaddr %o1, %g0, %g0 + + add %o1, (64 - 1), %o4 + andn %o4, (64 - 1), %o4 + andn %o2, (64 - 1), %g1 + sub %o2, %g1, %o2 + + and %o1, (64 - 1), %g2 + add %o1, %g1, %o1 + sub %o0, %o4, %g3 + brz,pt %g2, 190f + cmp %g2, 32 + blu,a 5f + cmp %g2, 16 + cmp %g2, 48 + blu,a 4f + cmp %g2, 40 + cmp %g2, 56 + blu 170f + nop + ba,a,pt %xcc, 180f + +4: /* 32 <= low bits < 48 */ + blu 150f + nop + ba,a,pt %xcc, 160f +5: /* 0 < low bits < 32 */ + blu,a 6f + cmp %g2, 8 + cmp %g2, 24 + blu 130f + nop + ba,a,pt %xcc, 140f +6: /* 0 < low bits < 16 */ + bgeu 120f + nop + /* fall through for 0 < low bits < 8 */ +110: sub %o4, 64, %g2 + EX_LD(LOAD_BLK(%g2, %f0)) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +120: sub %o4, 56, %g2 + FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +130: sub %o4, 48, %g2 + FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_6(f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +140: sub %o4, 40, %g2 + FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_5(f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +150: sub %o4, 32, %g2 + FREG_LOAD_4(%g2, f0, f2, f4, f6) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_4(f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +160: sub %o4, 24, %g2 + FREG_LOAD_3(%g2, f0, f2, f4) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_3(f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +170: sub %o4, 16, %g2 + FREG_LOAD_2(%g2, f0, f2) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_2(f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +180: sub %o4, 8, %g2 + FREG_LOAD_1(%g2, f0) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_1(f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +190: +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + subcc %g1, 64, %g1 + EX_LD(LOAD_BLK(%o4, %f0)) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + +195: + add %o4, %g3, %o0 + membar #Sync + + VISExitHalf + + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +75: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + EX_LD(LOAD(ldx, %o1, %o5)) + add %o1, 0x08, %o1 + EX_LD(LOAD(ldx, %o1, %g1)) + sub %o1, 0x08, %o1 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + EX_LD(LOAD(ldub, %o1, %o5)) + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + EX_LD(LOAD(ldx, %o1, %g3)) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/NG2page.S b/arch/sparc64/lib/NG2page.S new file mode 100644 index 0000000..73b6b7c --- /dev/null +++ b/arch/sparc64/lib/NG2page.S @@ -0,0 +1,61 @@ +/* NG2page.S: Niagara-2 optimized clear and copy page. + * + * Copyright (C) 2007 (davem@davemloft.net) + */ + +#include +#include +#include + + .text + .align 32 + + /* This is heavily simplified from the sun4u variants + * because Niagara-2 does not have any D-cache aliasing issues. + */ +NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x00], #one_read + prefetch [%o1 + 0x40], #one_read + VISEntryHalf + set PAGE_SIZE, %g7 + sub %o0, %o1, %g3 +1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + ldda [%o1] ASI_BLK_P, %f0 + stda %f0, [%o1 + %g3] ASI_BLK_P + add %o1, 64, %o1 + bne,pt %xcc, 1b + prefetch [%o1 + 0x40], #one_read + membar #Sync + VISExitHalf + retl + nop + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara2_patch_pageops + .type niagara2_patch_pageops,#function +niagara2_patch_pageops: + NG_DO_PATCH(copy_user_page, NG2copy_user_page) + NG_DO_PATCH(_clear_page, NGclear_page) + NG_DO_PATCH(clear_user_page, NGclear_user_page) + retl + nop + .size niagara2_patch_pageops,.-niagara2_patch_pageops diff --git a/arch/sparc64/lib/NG2patch.S b/arch/sparc64/lib/NG2patch.S new file mode 100644 index 0000000..28c36f0 --- /dev/null +++ b/arch/sparc64/lib/NG2patch.S @@ -0,0 +1,33 @@ +/* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant. + * + * Copyright (C) 2007 David S. Miller + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara2_patch_copyops + .type niagara2_patch_copyops,#function +niagara2_patch_copyops: + NG_DO_PATCH(memcpy, NG2memcpy) + NG_DO_PATCH(___copy_from_user, NG2copy_from_user) + NG_DO_PATCH(___copy_to_user, NG2copy_to_user) + retl + nop + .size niagara2_patch_copyops,.-niagara2_patch_copyops diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S index 8ce3a0c..428920d 100644 --- a/arch/sparc64/lib/NGpage.S +++ b/arch/sparc64/lib/NGpage.S @@ -45,6 +45,7 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ retl nop + .globl NGclear_page, NGclear_user_page NGclear_page: /* %o0=dest */ NGclear_user_page: /* %o0=dest, %o1=vaddr */ mov 8, %g1 -- cgit v1.1 From 53140b71c5e7b5370e4ac6ffc13fddbdfae71473 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:52:44 -0700 Subject: [SPARC64]: Do not touch %tick_cmpr on sun4v cpus. This register is not a part of the sun4v architecture. Niagara 1 and 2 happened to leave it around. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index a2652bc..63144ad 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -715,12 +715,13 @@ setup_trap_table: membar #Sync + BRANCH_IF_SUN4V(o2, 1f) + /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 wr %o2, 0, %tick_cmpr - BRANCH_IF_SUN4V(o2, 1f) BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) ba,pt %xcc, 2f -- cgit v1.1 From 7dc408808ad40b9a5d031966035713ed5d5153b8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:56:00 -0700 Subject: [SPARC64]: SMP trampoline needs to avoid %tick_cmpr on sun4v too. Signed-off-by: David S. Miller --- arch/sparc64/kernel/trampoline.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 9448595..9533a25 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -95,14 +95,13 @@ spitfire_startup: membar #Sync startup_continue: + mov %o0, %l0 + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) + sethi %hi(0x80000000), %g2 sllx %g2, 32, %g2 wr %g2, 0, %tick_cmpr - mov %o0, %l0 - - BRANCH_IF_SUN4V(g1, niagara_lock_tlb) - /* Call OBP by hand to lock KERNBASE into i/d tlbs. * We lock 2 consequetive entries if we are 'bigkernel'. */ -- cgit v1.1