summaryrefslogtreecommitdiffstats
path: root/arch/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/Kconfig23
-rw-r--r--arch/sparc/include/asm/cpudata_64.h5
-rw-r--r--arch/sparc/include/asm/hypervisor.h343
-rw-r--r--arch/sparc/include/asm/iommu_64.h28
-rw-r--r--arch/sparc/include/asm/spinlock_32.h2
-rw-r--r--arch/sparc/include/asm/spinlock_64.h12
-rw-r--r--arch/sparc/include/asm/topology_64.h8
-rw-r--r--arch/sparc/include/asm/uaccess_64.h28
-rw-r--r--arch/sparc/kernel/head_64.S37
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/iommu.c8
-rw-r--r--arch/sparc/kernel/iommu_common.h1
-rw-r--r--arch/sparc/kernel/jump_label.c23
-rw-r--r--arch/sparc/kernel/mdesc.c46
-rw-r--r--arch/sparc/kernel/pci_sun4v.c418
-rw-r--r--arch/sparc/kernel/pci_sun4v.h21
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S68
-rw-r--r--arch/sparc/kernel/ptrace_64.c24
-rw-r--r--arch/sparc/kernel/signal_32.c4
-rw-r--r--arch/sparc/kernel/smp_64.c8
-rw-r--r--arch/sparc/lib/GENcopy_from_user.S4
-rw-r--r--arch/sparc/lib/GENcopy_to_user.S4
-rw-r--r--arch/sparc/lib/GENmemcpy.S48
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/NG2copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG2copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG2memcpy.S228
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG4memcpy.S294
-rw-r--r--arch/sparc/lib/NGcopy_from_user.S4
-rw-r--r--arch/sparc/lib/NGcopy_to_user.S4
-rw-r--r--arch/sparc/lib/NGmemcpy.S233
-rw-r--r--arch/sparc/lib/U1copy_from_user.S8
-rw-r--r--arch/sparc/lib/U1copy_to_user.S8
-rw-r--r--arch/sparc/lib/U1memcpy.S345
-rw-r--r--arch/sparc/lib/U3copy_from_user.S8
-rw-r--r--arch/sparc/lib/U3copy_to_user.S8
-rw-r--r--arch/sparc/lib/U3memcpy.S227
-rw-r--r--arch/sparc/lib/copy_in_user.S35
-rw-r--r--arch/sparc/lib/user_fixup.c71
-rw-r--r--arch/sparc/mm/gup.c3
-rw-r--r--arch/sparc/mm/init_64.c71
-rw-r--r--arch/sparc/mm/tsb.c17
-rw-r--r--arch/sparc/mm/ultra.S374
45 files changed, 2334 insertions, 802 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b23c76b..165ecdd 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HARDENED_USERCOPY
+ select PROVE_LOCKING_SMALL if PROVE_LOCKING
config SPARC32
def_bool !64BIT
@@ -89,6 +90,14 @@ config ARCH_DEFCONFIG
config ARCH_PROC_KCORE_TEXT
def_bool y
+config ARCH_ATU
+ bool
+ default y if SPARC64
+
+config ARCH_DMA_ADDR_T_64BIT
+ bool
+ default y if ARCH_ATU
+
config IOMMU_HELPER
bool
default y if SPARC64
@@ -304,6 +313,20 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y if SPARC64
+config FORCE_MAX_ZONEORDER
+ int "Maximum zone order"
+ default "13"
+ help
+ The kernel memory allocator divides physically contiguous memory
+ blocks into "zones", where each zone is a power of two number of
+ pages. This option selects the largest power of two that the kernel
+ keeps in the memory allocator. If you need to allocate very large
+ blocks of physically contiguous memory, then you may need to
+ increase this value.
+
+ This config option is actually maximum order plus one. For example,
+ a value of 13 means that the largest free memory block is 2^12 pages.
+
source "mm/Kconfig"
if SPARC64
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6cfdab..5b0ed48 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,9 +24,10 @@ typedef struct {
unsigned int icache_line_size;
unsigned int ecache_size;
unsigned int ecache_line_size;
- unsigned short sock_id;
+ unsigned short sock_id; /* physical package */
unsigned short core_id;
- int proc_id;
+ unsigned short max_cache_id; /* groupings of highest shared cache */
+ unsigned short proc_id; /* strand (aka HW thread) id */
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 666d5ba..73cb897 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2335,6 +2335,348 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
*/
#define HV_FAST_PCI_MSG_SETVALID 0xd3
+/* PCI IOMMU v2 definitions and services
+ *
+ * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO
+ * definitions and services.
+ *
+ * CTE Clump Table Entry. First level table entry in the ATU.
+ *
+ * pci_device_list
+ * A 32-bit aligned list of pci_devices.
+ *
+ * pci_device_listp
+ * real address of a pci_device_list. 32-bit aligned.
+ *
+ * iotte IOMMU translation table entry.
+ *
+ * iotte_attributes
+ * IO Attributes for IOMMU v2 mappings. In addition to
+ * read, write IOMMU v2 supports relax ordering
+ *
+ * io_page_list A 64-bit aligned list of real addresses. Each real
+ * address in an io_page_list must be properly aligned
+ * to the pagesize of the given IOTSB.
+ *
+ * io_page_list_p Real address of an io_page_list, 64-bit aligned.
+ *
+ * IOTSB IO Translation Storage Buffer. An aligned table of
+ * IOTTEs. Each IOTSB has a pagesize, table size, and
+ * virtual address associated with it that must match
+ * a pagesize and table size supported by the un-derlying
+ * hardware implementation. The alignment requirements
+ * for an IOTSB depend on the pagesize used for that IOTSB.
+ * Each IOTTE in an IOTSB maps one pagesize-sized page.
+ * The size of the IOTSB dictates how large of a virtual
+ * address space the IOTSB is capable of mapping.
+ *
+ * iotsb_handle An opaque identifier for an IOTSB. A devhandle plus
+ * iotsb_handle represents a binding of an IOTSB to a
+ * PCI root complex.
+ *
+ * iotsb_index Zero-based IOTTE number within an IOTSB.
+ */
+
+/* The index_count argument consists of two fields:
+ * bits 63:48 #iottes and bits 47:0 iotsb_index
+ */
+#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \
+ (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index)))
+
+/* pci_iotsb_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_CONF
+ * ARG0: devhandle
+ * ARG1: r_addr
+ * ARG2: size
+ * ARG3: pagesize
+ * ARG4: iova
+ * RET0: status
+ * RET1: iotsb_handle
+ * ERRORS: EINVAL Invalid devhandle, size, iova, or pagesize
+ * EBADALIGN r_addr is not properly aligned
+ * ENORADDR r_addr is not a valid real address
+ * ETOOMANY No further IOTSBs may be configured
+ * EBUSY Duplicate devhandle, raddir, iova combination
+ *
+ * Create an IOTSB suitable for the PCI root complex identified by devhandle,
+ * for the DMA virtual address defined by the argument iova.
+ *
+ * r_addr is the properly aligned base address of the IOTSB and size is the
+ * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to
+ * being configured. If it contains any values other than zeros then the
+ * behavior is undefined.
+ *
+ * pagesize is the size of each page in the IOTSB. Note that the combination of
+ * size (table size) and pagesize must be valid.
+ *
+ * virt is the DMA virtual address this IOTSB will map.
+ *
+ * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1.
+ * Once configured, privileged access to the IOTSB memory is prohibited and
+ * creates undefined behavior. The only permitted access is indirect via these
+ * services.
+ */
+#define HV_FAST_PCI_IOTSB_CONF 0x190
+
+/* pci_iotsb_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_INFO
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * RET0: status
+ * RET1: r_addr
+ * RET2: size
+ * RET3: pagesize
+ * RET4: iova
+ * RET5: #bound
+ * ERRORS: EINVAL Invalid devhandle or iotsb_handle
+ *
+ * This service returns configuration information about an IOTSB previously
+ * created with pci_iotsb_conf.
+ *
+ * iotsb_handle value 0 may be used with this service to inquire about the
+ * legacy IOTSB that may or may not exist. If the service succeeds, the return
+ * values describe the legacy IOTSB and I/O virtual addresses mapped by that
+ * table. However, the table base address r_addr may contain the value -1 which
+ * indicates a memory range that cannot be accessed or be reclaimed.
+ *
+ * The return value #bound contains the number of PCI devices that iotsb_handle
+ * is currently bound to.
+ */
+#define HV_FAST_PCI_IOTSB_INFO 0x191
+
+/* pci_iotsb_unconf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_UNCONF
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or iotsb_handle
+ * EBUSY The IOTSB is bound and may not be unconfigured
+ *
+ * This service unconfigures the IOTSB identified by the devhandle and
+ * iotsb_handle arguments, previously created with pci_iotsb_conf.
+ * The IOTSB must not be currently bound to any device or the service will fail
+ *
+ * If the call succeeds, iotsb_handle is no longer valid.
+ */
+#define HV_FAST_PCI_IOTSB_UNCONF 0x192
+
+/* pci_iotsb_bind()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_BIND
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: pci_device
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
+ * EBUSY A PCI function is already bound to an IOTSB at the same
+ * address range as specified by devhandle, iotsb_handle.
+ *
+ * This service binds the PCI function specified by the argument pci_device to
+ * the IOTSB specified by the arguments devhandle and iotsb_handle.
+ *
+ * The PCI device function is bound to the specified IOTSB with the IOVA range
+ * specified when the IOTSB was configured via pci_iotsb_conf. If the function
+ * is already bound then it is unbound first.
+ */
+#define HV_FAST_PCI_IOTSB_BIND 0x193
+
+/* pci_iotsb_unbind()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_UNBIND
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: pci_device
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
+ * ENOMAP The PCI function was not bound to the specified IOTSB
+ *
+ * This service unbinds the PCI device specified by the argument pci_device
+ * from the IOTSB identified * by the arguments devhandle and iotsb_handle.
+ *
+ * If the PCI device is not bound to the specified IOTSB then this service will
+ * fail with status ENOMAP
+ */
+#define HV_FAST_PCI_IOTSB_UNBIND 0x194
+
+/* pci_iotsb_get_binding()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_GET_BINDING
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iova
+ * RET0: status
+ * RET1: iotsb_handle
+ * ERRORS: EINVAL Invalid devhandle, pci_device, or iova
+ * ENOMAP The PCI function is not bound to an IOTSB at iova
+ *
+ * This service returns the IOTSB binding, iotsb_handle, for a given pci_device
+ * and DMA virtual address, iova.
+ *
+ * iova must be the base address of a DMA virtual address range as defined by
+ * the iommu-address-ranges property in the root complex device node defined
+ * by the argument devhandle.
+ */
+#define HV_FAST_PCI_IOTSB_GET_BINDING 0x195
+
+/* pci_iotsb_map()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_MAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: index_count
+ * ARG3: iotte_attributes
+ * ARG4: io_page_list_p
+ * RET0: status
+ * RET1: #mapped
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, #iottes,
+ * iotsb_index or iotte_attributes
+ * EBADALIGN Improperly aligned io_page_list_p or I/O page
+ * address in the I/O page list.
+ * ENORADDR Invalid io_page_list_p or I/O page address in
+ * the I/O page list.
+ *
+ * This service creates and flushes mappings in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The index_count argument consists of two fields. Bits 63:48 contain #iotte
+ * and bits 47:0 contain iotsb_index
+ *
+ * The first mapping is created in the IOTSB index specified by iotsb_index.
+ * Subsequent mappings are created at iotsb_index+1 and so on.
+ *
+ * The attributes of each mapping are defined by the argument iotte_attributes.
+ *
+ * The io_page_list_p specifies the real address of the 64-bit-aligned list of
+ * #iottes I/O page addresses. Each page address must be a properly aligned
+ * real address of a page to be mapped in the IOTSB. The first entry in the I/O
+ * page list contains the real address of the first page, the 2nd entry for the
+ * 2nd page, and so on.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The return value #mapped is the actual number of mappings created, which may
+ * be less than or equal to the argument #iottes. If the function returns
+ * successfully with a #mapped value less than the requested #iottes then the
+ * caller should continue to invoke the service with updated iotsb_index,
+ * #iottes, and io_page_list_p arguments until all pages are mapped.
+ *
+ * This service must not be used to demap a mapping. In other words, all
+ * mappings must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP 0x196
+
+/* pci_iotsb_map_one()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_MAP_ONE
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: iotte_attributes
+ * ARG4: r_addr
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle,iotsb_handle, iotsb_index
+ * or iotte_attributes
+ * EBADALIGN Improperly aligned r_addr
+ * ENORADDR Invalid r_addr
+ *
+ * This service creates and flushes a single mapping in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The mapping for the page at r_addr is created at the IOTSB index specified by
+ * iotsb_index with the attributes iotte_attributes.
+ *
+ * This service must not be used to demap a mapping. In other words, the mapping
+ * must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP_ONE 0x197
+
+/* pci_iotsb_demap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_DEMAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: #iottes
+ * RET0: status
+ * RET1: #unmapped
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index or #iottes
+ *
+ * This service unmaps and flushes up to #iottes mappings starting at index
+ * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs unmapped is returned in #unmapped and may be less
+ * than or equal to the requested number of IOTTEs, #iottes.
+ *
+ * If #unmapped is less than #iottes, the caller should continue to invoke this
+ * service with updated iotsb_index and #iottes arguments until all pages are
+ * demapped.
+ */
+#define HV_FAST_PCI_IOTSB_DEMAP 0x198
+
+/* pci_iotsb_getmap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_GETMAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * RET0: status
+ * RET1: r_addr
+ * RET2: iotte_attributes
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or iotsb_index
+ * ENOMAP No mapping was found
+ *
+ * This service returns the mapping specified by index iotsb_index from the
+ * IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * Upon success, the real address of the mapping shall be returned in
+ * r_addr and thethe IOTTE mapping attributes shall be returned in
+ * iotte_attributes.
+ *
+ * The return value iotte_attributes may not include optional features used in
+ * the call to create the mapping.
+ */
+#define HV_FAST_PCI_IOTSB_GETMAP 0x199
+
+/* pci_iotsb_sync_mappings()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_SYNC_MAPPINGS
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: #iottes
+ * RET0: status
+ * RET1: #synced
+ * ERROS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index, or #iottes
+ *
+ * This service synchronizes #iottes mappings starting at index iotsb_index in
+ * the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs synchronized is returned in #synced, which may
+ * be less than or equal to the requested number, #iottes.
+ *
+ * Upon a successful return, #synced is less than #iottes, the caller should
+ * continue to invoke this service with updated iotsb_index and #iottes
+ * arguments until all pages are synchronized.
+ */
+#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS 0x19a
+
/* Logical Domain Channel services. */
#define LDC_CHANNEL_DOWN 0
@@ -2993,6 +3335,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
+#define HV_GRP_ATU 0x0111
#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index cd0d69f..f24f356 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -24,8 +24,36 @@ struct iommu_arena {
unsigned int limit;
};
+#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
+
+/* Data structures for SPARC ATU architecture */
+struct atu_iotsb {
+ void *table; /* IOTSB table base virtual addr*/
+ u64 ra; /* IOTSB table real addr */
+ u64 dvma_size; /* ranges[3].size or OS slected 32G size */
+ u64 dvma_base; /* ranges[3].base */
+ u64 table_size; /* IOTSB table size */
+ u64 page_size; /* IO PAGE size for IOTSB */
+ u32 iotsb_num; /* tsbnum is same as iotsb_handle */
+};
+
+struct atu_ranges {
+ u64 base;
+ u64 size;
+};
+
+struct atu {
+ struct atu_ranges *ranges;
+ struct atu_iotsb *iotsb;
+ struct iommu_map_table tbl;
+ u64 base;
+ u64 size;
+ u64 dma_addr_mask;
+};
+
struct iommu {
struct iommu_map_table tbl;
+ struct atu *atu;
spinlock_t lock;
u32 dma_addr_mask;
iopte_t *page_table;
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index d9c5876..8011e79 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
*(volatile __u32 *)&lp->lock = ~0U;
}
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__(
" st %%g0, [%0]"
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 87990b7..07c9f2e 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
-static void inline arch_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
{
unsigned long tmp1, tmp2;
@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock)
: "memory");
}
-static int inline arch_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
{
int tmp1, tmp2;
@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock)
return tmp1;
}
-static void inline arch_read_unlock(arch_rwlock_t *lock)
+static inline void arch_read_unlock(arch_rwlock_t *lock)
{
unsigned long tmp1, tmp2;
@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock)
: "memory");
}
-static void inline arch_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
{
unsigned long mask, tmp1, tmp2;
@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock)
: "memory");
}
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__(
" stw %%g0, [%0]"
@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock)
: "memory");
}
-static int inline arch_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
{
unsigned long mask, tmp1, tmp2, result;
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index bec481a..7b4898a 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -44,14 +44,20 @@ int __node_distance(int, int);
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
+#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu])
#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#endif /* CONFIG_SMP */
extern cpumask_t cpu_core_map[NR_CPUS];
extern cpumask_t cpu_core_sib_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
+
+/**
+ * Return cores that shares the last level cache.
+ */
static inline const struct cpumask *cpu_coregroup_mask(int cpu)
{
- return &cpu_core_map[cpu];
+ return &cpu_core_sib_cache_map[cpu];
}
#endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index b68acc5..5373136 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
return 1;
}
-void __ret_efault(void);
void __retl_efault(void);
/* Uh, these should become the main single-value transfer routines..
@@ -189,55 +188,34 @@ int __get_user_bad(void);
unsigned long __must_check ___copy_from_user(void *to,
const void __user *from,
unsigned long size);
-unsigned long copy_from_user_fixup(void *to, const void __user *from,
- unsigned long size);
static inline unsigned long __must_check
copy_from_user(void *to, const void __user *from, unsigned long size)
{
- unsigned long ret;
-
check_object_size(to, size, false);
- ret = ___copy_from_user(to, from, size);
- if (unlikely(ret))
- ret = copy_from_user_fixup(to, from, size);
-
- return ret;
+ return ___copy_from_user(to, from, size);
}
#define __copy_from_user copy_from_user
unsigned long __must_check ___copy_to_user(void __user *to,
const void *from,
unsigned long size);
-unsigned long copy_to_user_fixup(void __user *to, const void *from,
- unsigned long size);
static inline unsigned long __must_check
copy_to_user(void __user *to, const void *from, unsigned long size)
{
- unsigned long ret;
-
check_object_size(from, size, true);
- ret = ___copy_to_user(to, from, size);
- if (unlikely(ret))
- ret = copy_to_user_fixup(to, from, size);
- return ret;
+ return ___copy_to_user(to, from, size);
}
#define __copy_to_user copy_to_user
unsigned long __must_check ___copy_in_user(void __user *to,
const void __user *from,
unsigned long size);
-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
- unsigned long size);
static inline unsigned long __must_check
copy_in_user(void __user *to, void __user *from, unsigned long size)
{
- unsigned long ret = ___copy_in_user(to, from, size);
-
- if (unlikely(ret))
- ret = copy_in_user_fixup(to, from, size);
- return ret;
+ return ___copy_in_user(to, from, size);
}
#define __copy_in_user copy_in_user
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index beba6c1..6aa3da1 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -926,48 +926,11 @@ tlb_type: .word 0 /* Must NOT end up in BSS */
EXPORT_SYMBOL(tlb_type)
.section ".fixup",#alloc,#execinstr
- .globl __ret_efault, __retl_efault, __ret_one, __retl_one
-ENTRY(__ret_efault)
- ret
- restore %g0, -EFAULT, %o0
-ENDPROC(__ret_efault)
-EXPORT_SYMBOL(__ret_efault)
-
ENTRY(__retl_efault)
retl
mov -EFAULT, %o0
ENDPROC(__retl_efault)
-ENTRY(__retl_one)
- retl
- mov 1, %o0
-ENDPROC(__retl_one)
-
-ENTRY(__retl_one_fp)
- VISExitHalf
- retl
- mov 1, %o0
-ENDPROC(__retl_one_fp)
-
-ENTRY(__ret_one_asi)
- wr %g0, ASI_AIUS, %asi
- ret
- restore %g0, 1, %o0
-ENDPROC(__ret_one_asi)
-
-ENTRY(__retl_one_asi)
- wr %g0, ASI_AIUS, %asi
- retl
- mov 1, %o0
-ENDPROC(__retl_one_asi)
-
-ENTRY(__retl_one_asi_fp)
- wr %g0, ASI_AIUS, %asi
- VISExitHalf
- retl
- mov 1, %o0
-ENDPROC(__retl_one_asi_fp)
-
ENTRY(__retl_o1)
retl
mov %o1, %o0
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 662500f..2677312 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_SDIO, },
{ .group = HV_GRP_SDIO_ERR, },
{ .group = HV_GRP_REBOOT_DATA, },
+ { .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
{ .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
{ .group = HV_GRP_FIRE_PERF, },
{ .group = HV_GRP_N2_CPU, },
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5c615ab..852a329 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
struct iommu *iommu = dev->archdata.iommu;
u64 dma_addr_mask = iommu->dma_addr_mask;
- if (device_mask >= (1UL << 32UL))
- return 0;
+ if (device_mask > DMA_BIT_MASK(32)) {
+ if (iommu->atu)
+ dma_addr_mask = iommu->atu->dma_addr_mask;
+ else
+ return 0;
+ }
if ((device_mask & dma_addr_mask) == dma_addr_mask)
return 1;
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index b40cec2..8284933 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -13,7 +13,6 @@
#include <linux/scatterlist.h>
#include <linux/device.h>
#include <linux/iommu-helper.h>
-#include <linux/scatterlist.h>
#include <asm/iommu.h>
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 59bbeff..07933b9 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -13,19 +13,30 @@
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- u32 val;
u32 *insn = (u32 *) (unsigned long) entry->code;
+ u32 val;
if (type == JUMP_LABEL_JMP) {
s32 off = (s32)entry->target - (s32)entry->code;
+ bool use_v9_branch = false;
+
+ BUG_ON(off & 3);
#ifdef CONFIG_SPARC64
- /* ba,pt %xcc, . + (off << 2) */
- val = 0x10680000 | ((u32) off >> 2);
-#else
- /* ba . + (off << 2) */
- val = 0x10800000 | ((u32) off >> 2);
+ if (off <= 0xfffff && off >= -0x100000)
+ use_v9_branch = true;
#endif
+ if (use_v9_branch) {
+ /* WDISP19 - target is . + immed << 2 */
+ /* ba,pt %xcc, . + off */
+ val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
+ } else {
+ /* WDISP22 - target is . + immed << 2 */
+ BUG_ON(off > 0x7fffff);
+ BUG_ON(off < -0x800000);
+ /* ba . + off */
+ val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
+ }
} else {
val = 0x01000000;
}
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 1122886..8a6982d 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node,
cpu_data(*id).core_id = core_id;
}
-static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
- int sock_id)
+static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
+ int max_cache_id)
{
const u64 *id = mdesc_get_property(hp, node, "id", NULL);
- if (*id < num_possible_cpus())
- cpu_data(*id).sock_id = sock_id;
+ if (*id < num_possible_cpus()) {
+ cpu_data(*id).max_cache_id = max_cache_id;
+
+ /**
+ * On systems without explicit socket descriptions socket
+ * is max_cache_id
+ */
+ cpu_data(*id).sock_id = max_cache_id;
+ }
}
static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
}
-static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
- int sock_id)
+static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
+ int max_cache_id)
{
- find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+ find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
+ max_cache_id, 10);
}
static void set_core_ids(struct mdesc_handle *hp)
@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp)
}
}
-static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
{
u64 mp;
int idx = 1;
int fnd = 0;
- /* Identify unique sockets by looking for cpus backpointed to by
- * shared level n caches.
+ /**
+ * Identify unique highest level of shared cache by looking for cpus
+ * backpointed to by shared level N caches.
*/
mdesc_for_each_node_by_name(hp, mp, "cache") {
const u64 *cur_lvl;
@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
if (*cur_lvl != level)
continue;
-
- mark_sock_ids(hp, mp, idx);
+ mark_max_cache_ids(hp, mp, idx);
idx++;
fnd = 1;
}
@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp)
{
u64 mp;
- /* If machine description exposes sockets data use it.
- * Otherwise fallback to use shared L3 or L2 caches.
+ /**
+ * Find the highest level of shared cache which pre-T7 is also
+ * the socket.
*/
+ if (!set_max_cache_ids_by_cache(hp, 3))
+ set_max_cache_ids_by_cache(hp, 2);
+
+ /* If machine description exposes sockets data use it.*/
mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
if (mp != MDESC_NODE_NULL)
- return set_sock_ids_by_socket(hp, mp);
-
- if (!set_sock_ids_by_cache(hp, 3))
- set_sock_ids_by_cache(hp, 2);
+ set_sock_ids_by_socket(hp, mp);
}
static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index db57d8a..06981cc 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
{ .major = 1, .minor = 1 },
};
+static unsigned long vatu_major = 1;
+static unsigned long vatu_minor = 1;
+
#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
struct iommu_batch {
@@ -69,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
}
/* Interrupts must be disabled. */
-static long iommu_batch_flush(struct iommu_batch *p)
+static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
{
struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+ u64 *pglist = p->pglist;
+ u64 index_count;
unsigned long devhandle = pbm->devhandle;
unsigned long prot = p->prot;
unsigned long entry = p->entry;
- u64 *pglist = p->pglist;
unsigned long npages = p->npages;
+ unsigned long iotsb_num;
+ unsigned long ret;
+ long num;
/* VPCI maj=1, min=[0,1] only supports read and write */
if (vpci_major < 2)
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) {
- long num;
-
- num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist));
- if (unlikely(num < 0)) {
- if (printk_ratelimit())
- printk("iommu_batch_flush: IOMMU map of "
- "[%08lx:%08llx:%lx:%lx:%lx] failed with "
- "status %ld\n",
- devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist), num);
- return -1;
+ if (mask <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_map(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages,
+ prot,
+ __pa(pglist));
+ if (unlikely(num < 0)) {
+ pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist),
+ num);
+ return -1;
+ }
+ } else {
+ index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
+ iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
+ ret = pci_sun4v_iotsb_map(devhandle,
+ iotsb_num,
+ index_count,
+ prot,
+ __pa(pglist),
+ &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle, iotsb_num,
+ index_count, prot,
+ __pa(pglist), ret);
+ return -1;
+ }
}
-
entry += num;
npages -= num;
pglist += num;
@@ -108,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p)
return 0;
}
-static inline void iommu_batch_new_entry(unsigned long entry)
+static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
if (p->entry + p->npages == entry)
return;
if (p->entry != ~0UL)
- iommu_batch_flush(p);
+ iommu_batch_flush(p, mask);
p->entry = entry;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_add(u64 phys_page)
+static inline long iommu_batch_add(u64 phys_page, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
@@ -128,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page)
p->pglist[p->npages++] = phys_page;
if (p->npages == PGLIST_NENTS)
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
return 0;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_end(void)
+static inline long iommu_batch_end(u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
BUG_ON(p->npages >= PGLIST_NENTS);
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
}
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp,
unsigned long attrs)
{
+ u64 mask;
unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
struct page *page;
void *ret;
long entry;
@@ -174,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
+ mask = dev->coherent_dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto range_alloc_fail;
- *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = (void *) first_page;
first_page = __pa(first_page);
@@ -193,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
entry);
for (n = 0; n < npages; n++) {
- long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
+ long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -206,25 +242,71 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
return ret;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
range_alloc_fail:
free_pages(first_page, order);
return NULL;
}
-static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
- unsigned long npages)
+unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ struct pci_bus *bus_dev)
+{
+ struct pci_dev *pdev;
+ unsigned long err;
+ unsigned int bus;
+ unsigned int device;
+ unsigned int fun;
+
+ list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
+ if (pdev->subordinate) {
+ /* No need to bind pci bridge */
+ dma_4v_iotsb_bind(devhandle, iotsb_num,
+ pdev->subordinate);
+ } else {
+ bus = bus_dev->number;
+ device = PCI_SLOT(pdev->devfn);
+ fun = PCI_FUNC(pdev->devfn);
+ err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
+ HV_PCI_DEVICE_BUILD(bus,
+ device,
+ fun));
+
+ /* If bind fails for one device it is going to fail
+ * for rest of the devices because we are sharing
+ * IOTSB. So in case of failure simply return with
+ * error.
+ */
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
+ dma_addr_t dvma, unsigned long iotsb_num,
+ unsigned long entry, unsigned long npages)
{
- u32 devhandle = *(u32 *)demap_arg;
unsigned long num, flags;
+ unsigned long ret;
local_irq_save(flags);
do {
- num = pci_sun4v_iommu_demap(devhandle,
- HV_PCI_TSBID(0, entry),
- npages);
-
+ if (dvma <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_demap(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages);
+ } else {
+ ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
+ entry, npages, &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
+ ret);
+ }
+ }
entry += num;
npages -= num;
} while (npages != 0);
@@ -236,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long order, npages, entry;
+ unsigned long iotsb_num;
u32 devhandle;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
- entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+ if (dvma <= DMA_BIT_MASK(32)) {
+ tbl = &iommu->tbl;
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ } else {
+ tbl = &atu->tbl;
+ iotsb_num = atu->iotsb->iotsb_num;
+ }
+ entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+ dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
@@ -257,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long flags, npages, oaddr;
unsigned long i, base_paddr;
- u32 bus_addr, ret;
unsigned long prot;
+ dma_addr_t bus_addr, ret;
long entry;
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
if (unlikely(direction == DMA_NONE))
goto bad;
@@ -272,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto bad;
- bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK);
prot = HV_PCI_MAP_ATTR_READ;
@@ -293,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
iommu_batch_start(dev, prot, entry);
for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
- long err = iommu_batch_add(base_paddr);
+ long err = iommu_batch_add(base_paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -310,7 +414,7 @@ bad:
return DMA_ERROR_CODE;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
return DMA_ERROR_CODE;
}
@@ -320,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long npages;
+ unsigned long iotsb_num;
long entry;
u32 devhandle;
@@ -332,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
bus_addr &= IO_PAGE_MASK;
- entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+
+ if (bus_addr <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
+ entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
+ dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
}
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -353,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
unsigned long seg_boundary_size;
int outcount, incount, i;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long base_shift;
long err;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
if (nelems == 0 || !iommu)
return 0;
@@ -384,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
- base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
+
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
+
for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen;
@@ -397,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages,
&handle, (unsigned long)(-1), 0);
/* Handle failure */
if (unlikely(entry == IOMMU_ERROR_CODE)) {
- if (printk_ratelimit())
- printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
- " npages %lx\n", iommu, paddr, npages);
+ pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
+ tbl, paddr, npages);
goto iommu_map_failed;
}
- iommu_batch_new_entry(entry);
+ iommu_batch_new_entry(entry, mask);
/* Convert entry to a dma_addr_t */
- dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
+ dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK);
/* Insert into HW table */
paddr &= IO_PAGE_MASK;
while (npages--) {
- err = iommu_batch_add(paddr);
+ err = iommu_batch_add(paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
paddr += IO_PAGE_SIZE;
@@ -452,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
dma_next = dma_addr + slen;
}
- err = iommu_batch_end();
+ err = iommu_batch_end(mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
@@ -475,7 +603,7 @@ iommu_map_failed:
vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE);
- iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+ iommu_tbl_range_free(tbl, vaddr, npages,
IOMMU_ERROR_CODE);
/* XXX demap? XXX */
s->dma_address = DMA_ERROR_CODE;
@@ -496,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct pci_pbm_info *pbm;
struct scatterlist *sg;
struct iommu *iommu;
+ struct atu *atu;
unsigned long flags, entry;
+ unsigned long iotsb_num;
u32 devhandle;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
local_irq_save(flags);
@@ -512,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
dma_addr_t dma_handle = sg->dma_address;
unsigned int len = sg->dma_length;
unsigned long npages;
- struct iommu_map_table *tbl = &iommu->tbl;
+ struct iommu_map_table *tbl;
unsigned long shift = IO_PAGE_SHIFT;
if (!len)
break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+ if (dma_handle <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
entry = ((dma_handle - tbl->table_map_base) >> shift);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+ dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
+ entry, npages);
+ iommu_tbl_range_free(tbl, dma_handle, npages,
IOMMU_ERROR_CODE);
sg = sg_next(sg);
}
@@ -581,6 +721,132 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
return cnt;
}
+static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ struct atu_iotsb *iotsb;
+ void *table;
+ u64 table_size;
+ u64 iotsb_num;
+ unsigned long order;
+ unsigned long err;
+
+ iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
+ if (!iotsb) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ atu->iotsb = iotsb;
+
+ /* calculate size of IOTSB */
+ table_size = (atu->size / IO_PAGE_SIZE) * 8;
+ order = get_order(table_size);
+ table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!table) {
+ err = -ENOMEM;
+ goto table_failed;
+ }
+ iotsb->table = table;
+ iotsb->ra = __pa(table);
+ iotsb->dvma_size = atu->size;
+ iotsb->dvma_base = atu->base;
+ iotsb->table_size = table_size;
+ iotsb->page_size = IO_PAGE_SIZE;
+
+ /* configure and register IOTSB with HV */
+ err = pci_sun4v_iotsb_conf(pbm->devhandle,
+ iotsb->ra,
+ iotsb->table_size,
+ iotsb->page_size,
+ iotsb->dvma_base,
+ &iotsb_num);
+ if (err) {
+ pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+ iotsb->iotsb_num = iotsb_num;
+
+ err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
+ if (err) {
+ pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+
+ return 0;
+
+iotsb_conf_failed:
+ free_pages((unsigned long)table, order);
+table_failed:
+ kfree(iotsb);
+out_err:
+ return err;
+}
+
+static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ unsigned long err;
+ const u64 *ranges;
+ u64 map_size, num_iotte;
+ u64 dma_mask;
+ const u32 *page_size;
+ int len;
+
+ ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
+ &len);
+ if (!ranges) {
+ pr_err(PFX "No iommu-address-ranges\n");
+ return -EINVAL;
+ }
+
+ page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
+ NULL);
+ if (!page_size) {
+ pr_err(PFX "No iommu-pagesizes\n");
+ return -EINVAL;
+ }
+
+ /* There are 4 iommu-address-ranges supported. Each range is pair of
+ * {base, size}. The ranges[0] and ranges[1] are 32bit address space
+ * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit
+ * address ranges to support 64bit addressing. Because 'size' for
+ * address ranges[2] and ranges[3] are same we can select either of
+ * ranges[2] or ranges[3] for mapping. However due to 'size' is too
+ * large for OS to allocate IOTSB we are using fix size 32G
+ * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
+ * to share.
+ */
+ atu->ranges = (struct atu_ranges *)ranges;
+ atu->base = atu->ranges[3].base;
+ atu->size = ATU_64_SPACE_SIZE;
+
+ /* Create IOTSB */
+ err = pci_sun4v_atu_alloc_iotsb(pbm);
+ if (err) {
+ pr_err(PFX "Error creating ATU IOTSB\n");
+ return err;
+ }
+
+ /* Create ATU iommu map.
+ * One bit represents one iotte in IOTSB table.
+ */
+ dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
+ num_iotte = atu->size / IO_PAGE_SIZE;
+ map_size = num_iotte / 8;
+ atu->tbl.table_map_base = atu->base;
+ atu->dma_addr_mask = dma_mask;
+ atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
+ if (!atu->tbl.map)
+ return -ENOMEM;
+
+ iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
+ NULL, false /* no large_pool */,
+ 0 /* default npools */,
+ false /* want span boundary checking */);
+
+ return 0;
+}
+
static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
{
static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
@@ -918,6 +1184,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
pci_sun4v_scan_bus(pbm, &op->dev);
+ /* if atu_init fails its not complete failure.
+ * we can still continue using legacy iommu.
+ */
+ if (pbm->iommu->atu) {
+ err = pci_sun4v_atu_init(pbm);
+ if (err) {
+ kfree(pbm->iommu->atu);
+ pbm->iommu->atu = NULL;
+ pr_err(PFX "ATU init failed, err=%d\n", err);
+ }
+ }
+
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
@@ -931,8 +1209,10 @@ static int pci_sun4v_probe(struct platform_device *op)
struct pci_pbm_info *pbm;
struct device_node *dp;
struct iommu *iommu;
+ struct atu *atu;
u32 devhandle;
int i, err = -ENODEV;
+ static bool hv_atu = true;
dp = op->dev.of_node;
@@ -954,6 +1234,19 @@ static int pci_sun4v_probe(struct platform_device *op)
pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
vpci_major, vpci_minor);
+ err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
+ if (err) {
+ /* don't return an error if we fail to register the
+ * ATU group, but ATU hcalls won't be available.
+ */
+ hv_atu = false;
+ pr_err(PFX "Could not register hvapi ATU err=%d\n",
+ err);
+ } else {
+ pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
+ vatu_major, vatu_minor);
+ }
+
dma_ops = &sun4v_dma_ops;
}
@@ -991,6 +1284,14 @@ static int pci_sun4v_probe(struct platform_device *op)
}
pbm->iommu = iommu;
+ iommu->atu = NULL;
+ if (hv_atu) {
+ atu = kzalloc(sizeof(*atu), GFP_KERNEL);
+ if (!atu)
+ pr_err(PFX "Could not allocate atu\n");
+ else
+ iommu->atu = atu;
+ }
err = pci_sun4v_pbm_init(pbm, op, devhandle);
if (err)
@@ -1001,6 +1302,7 @@ static int pci_sun4v_probe(struct platform_device *op)
return 0;
out_free_iommu:
+ kfree(iommu->atu);
kfree(pbm->iommu);
out_free_controller:
diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h
index 5642212..22603a4 100644
--- a/arch/sparc/kernel/pci_sun4v.h
+++ b/arch/sparc/kernel/pci_sun4v.h
@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
unsigned long msinum,
unsigned long valid);
+/* Sun4v HV IOMMU v2 APIs */
+unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
+ unsigned long ra,
+ unsigned long table_size,
+ unsigned long page_size,
+ unsigned long dvma_base,
+ u64 *iotsb_num);
+unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned int pci_device);
+unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index_iottes,
+ unsigned long io_attributes,
+ unsigned long io_page_list_pa,
+ long *mapped);
+unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index,
+ unsigned long iottes,
+ unsigned long *demapped);
#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
index e606d46..578f096 100644
--- a/arch/sparc/kernel/pci_sun4v_asm.S
+++ b/arch/sparc/kernel/pci_sun4v_asm.S
@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
mov %o0, %o0
ENDPROC(pci_sun4v_msg_setvalid)
+ /*
+ * %o0: devhandle
+ * %o1: r_addr
+ * %o2: size
+ * %o3: pagesize
+ * %o4: virt
+ * %o5: &iotsb_num/&iotsb_handle
+ *
+ * returns %o0: status
+ * %o1: iotsb_num/iotsb_handle
+ */
+ENTRY(pci_sun4v_iotsb_conf)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_CONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_conf)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: pci_device
+ *
+ * returns %o0: status
+ */
+ENTRY(pci_sun4v_iotsb_bind)
+ mov HV_FAST_PCI_IOTSB_BIND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(pci_sun4v_iotsb_bind)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: index_count
+ * %o3: iotte_attributes
+ * %o4: io_page_list_p
+ * %o5: &mapped
+ *
+ * returns %o0: status
+ * %o1: #mapped
+ */
+ENTRY(pci_sun4v_iotsb_map)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_MAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_map)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: iotsb_index
+ * %o3: #iottes
+ * %o4: &demapped
+ *
+ * returns %o0: status
+ * %o1: #demapped
+ */
+ENTRY(pci_sun4v_iotsb_demap)
+ mov HV_FAST_PCI_IOTSB_DEMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%o4]
+ENDPROC(pci_sun4v_iotsb_demap)
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 9ddc492..ac082dd 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -127,7 +127,8 @@ static int get_from_target(struct task_struct *target, unsigned long uaddr,
if (copy_from_user(kbuf, (void __user *) uaddr, len))
return -EFAULT;
} else {
- int len2 = access_process_vm(target, uaddr, kbuf, len, 0);
+ int len2 = access_process_vm(target, uaddr, kbuf, len,
+ FOLL_FORCE);
if (len2 != len)
return -EFAULT;
}
@@ -141,7 +142,8 @@ static int set_to_target(struct task_struct *target, unsigned long uaddr,
if (copy_to_user((void __user *) uaddr, kbuf, len))
return -EFAULT;
} else {
- int len2 = access_process_vm(target, uaddr, kbuf, len, 1);
+ int len2 = access_process_vm(target, uaddr, kbuf, len,
+ FOLL_FORCE | FOLL_WRITE);
if (len2 != len)
return -EFAULT;
}
@@ -505,7 +507,8 @@ static int genregs32_get(struct task_struct *target,
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
- k, sizeof(*k), 0)
+ k, sizeof(*k),
+ FOLL_FORCE)
!= sizeof(*k))
return -EFAULT;
k++;
@@ -531,12 +534,14 @@ static int genregs32_get(struct task_struct *target,
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
- &reg, sizeof(reg), 0)
+ &reg, sizeof(reg),
+ FOLL_FORCE)
!= sizeof(reg))
return -EFAULT;
if (access_process_vm(target,
(unsigned long) u,
- &reg, sizeof(reg), 1)
+ &reg, sizeof(reg),
+ FOLL_FORCE | FOLL_WRITE)
!= sizeof(reg))
return -EFAULT;
pos++;
@@ -615,7 +620,8 @@ static int genregs32_set(struct task_struct *target,
(unsigned long)
&reg_window[pos],
(void *) k,
- sizeof(*k), 1)
+ sizeof(*k),
+ FOLL_FORCE | FOLL_WRITE)
!= sizeof(*k))
return -EFAULT;
k++;
@@ -642,13 +648,15 @@ static int genregs32_set(struct task_struct *target,
if (access_process_vm(target,
(unsigned long)
u,
- &reg, sizeof(reg), 0)
+ &reg, sizeof(reg),
+ FOLL_FORCE)
!= sizeof(reg))
return -EFAULT;
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
- &reg, sizeof(reg), 1)
+ &reg, sizeof(reg),
+ FOLL_FORCE | FOLL_WRITE)
!= sizeof(reg))
return -EFAULT;
pos++;
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index c3c12ef..9c0c8fd 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
/* 1. Make sure we are not getting garbage from the user */
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv_and_exit;
if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
synchronize_user_stack();
sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv;
if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..8182f7c 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
[0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
static cpumask_t smp_commenced_mask;
@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void)
unsigned int j;
for_each_present_cpu(j) {
+ if (cpu_data(i).max_cache_id ==
+ cpu_data(j).max_cache_id)
+ cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
if (cpu_data(i).sock_id == cpu_data(j).sock_id)
cpumask_set_cpu(j, &cpu_core_sib_map[i]);
}
diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
index b7d0bd6..69a439f 100644
--- a/arch/sparc/lib/GENcopy_from_user.S
+++ b/arch/sparc/lib/GENcopy_from_user.S
@@ -3,11 +3,11 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_LD(x) \
+#define EX_LD(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
index 780550e..9947427 100644
--- a/arch/sparc/lib/GENcopy_to_user.S
+++ b/arch/sparc/lib/GENcopy_to_user.S
@@ -3,11 +3,11 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
index 89358ee..059ea24 100644
--- a/arch/sparc/lib/GENmemcpy.S
+++ b/arch/sparc/lib/GENmemcpy.S
@@ -4,21 +4,18 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#define GLOBAL_SPARE %g7
#else
#define GLOBAL_SPARE %g5
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
+#define EX_ST(x,y) x
#endif
#ifndef LOAD
@@ -45,6 +42,29 @@
.register %g3,#scratch
.text
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+ENTRY(GEN_retl_o4_1)
+ add %o4, %o2, %o4
+ retl
+ add %o4, 1, %o0
+ENDPROC(GEN_retl_o4_1)
+ENTRY(GEN_retl_g1_8)
+ add %g1, %o2, %g1
+ retl
+ add %g1, 8, %o0
+ENDPROC(GEN_retl_g1_8)
+ENTRY(GEN_retl_o2_4)
+ retl
+ add %o2, 4, %o0
+ENDPROC(GEN_retl_o2_4)
+ENTRY(GEN_retl_o2_1)
+ retl
+ add %o2, 1, %o0
+ENDPROC(GEN_retl_o2_1)
+#endif
+
.align 64
.globl FUNC_NAME
@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %o4, %o4
sub %o2, %o4, %o2
1: subcc %o4, 1, %o4
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o0))
+ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
+ EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
add %o1, 1, %o1
bne,pt %XCC, 1b
add %o0, 1, %o0
@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x7, %g1
sub %o2, %g1, %o2
1: subcc %g1, 0x8, %g1
- EX_LD(LOAD(ldx, %o1, %g2))
- EX_ST(STORE(stx, %g2, %o0))
+ EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
+ EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
add %o1, 0x8, %o1
bne,pt %XCC, 1b
add %o0, 0x8, %o0
@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1:
subcc %o2, 4, %o2
- EX_LD(LOAD(lduw, %o1, %g1))
- EX_ST(STORE(stw, %g1, %o1 + %o3))
+ EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
+ EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
bgu,pt %XCC, 1b
add %o1, 4, %o1
@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32
90:
subcc %o2, 1, %o2
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o1 + %o3))
+ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
+ EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 885f00e..69912d2 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
obj-$(CONFIG_SPARC64) += iomap.o
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
index d5242b8..b79a699 100644
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_LD(x) \
+#define EX_LD(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_LD_FP(x) \
+#define EX_LD_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
index 4e962d9..dcec55f 100644
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_ST_FP(x) \
+#define EX_ST_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index d5f585d..c629dbd 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -4,6 +4,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#define GLOBAL_SPARE %g7
@@ -32,21 +33,17 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_LD_FP
-#define EX_LD_FP(x) x
+#define EX_LD_FP(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
+#define EX_ST(x,y) x
#endif
#ifndef EX_ST_FP
-#define EX_ST_FP(x) x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
+#define EX_ST_FP(x,y) x
#endif
#ifndef LOAD
@@ -140,45 +137,110 @@
fsrc2 %x6, %f12; \
fsrc2 %x7, %f14;
#define FREG_LOAD_1(base, x0) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
#define FREG_LOAD_2(base, x0, x1) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
#define FREG_LOAD_3(base, x0, x1, x2) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
#define FREG_LOAD_4(base, x0, x1, x2, x3) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
- EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
- EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
+ EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
.register %g2,#scratch
.register %g3,#scratch
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+__restore_fp:
+ VISExitHalf
+__restore_asi:
+ retl
+ wr %g0, ASI_AIUS, %asi
+ENTRY(NG2_retl_o2)
+ ba,pt %xcc, __restore_asi
+ mov %o2, %o0
+ENDPROC(NG2_retl_o2)
+ENTRY(NG2_retl_o2_plus_1)
+ ba,pt %xcc, __restore_asi
+ add %o2, 1, %o0
+ENDPROC(NG2_retl_o2_plus_1)
+ENTRY(NG2_retl_o2_plus_4)
+ ba,pt %xcc, __restore_asi
+ add %o2, 4, %o0
+ENDPROC(NG2_retl_o2_plus_4)
+ENTRY(NG2_retl_o2_plus_8)
+ ba,pt %xcc, __restore_asi
+ add %o2, 8, %o0
+ENDPROC(NG2_retl_o2_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_1)
+ add %o4, 1, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_1)
+ENTRY(NG2_retl_o2_plus_o4_plus_8)
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_16)
+ add %o4, 16, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_16)
+ENTRY(NG2_retl_o2_plus_g1_fp)
+ ba,pt %xcc, __restore_fp
+ add %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
+ add %g1, 64, %g1
+ ba,pt %xcc, __restore_fp
+ add %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_1)
+ add %g1, 1, %g1
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_1)
+ENTRY(NG2_retl_o2_and_7_plus_o4)
+ and %o2, 7, %o2
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4)
+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
+ and %o2, 7, %o2
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
+#endif
+
.align 64
.globl FUNC_NAME
@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %o4, %o4 ! bytes to align dst
sub %o2, %o4, %o2
1: subcc %o4, 1, %o4
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o0))
+ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
+ EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
add %o1, 1, %o1
bne,pt %XCC, 1b
add %o0, 1, %o0
@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
/* fall through for 0 < low bits < 8 */
110: sub %o4, 64, %g2
- EX_LD_FP(LOAD_BLK(%g2, %f0))
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+ EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
120: sub %o4, 56, %g2
FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
130: sub %o4, 48, %g2
FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
140: sub %o4, 40, %g2
FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_5(f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
150: sub %o4, 32, %g2
FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_4(f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
160: sub %o4, 24, %g2
FREG_LOAD_3(%g2, f0, f2, f4)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_3(f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
170: sub %o4, 16, %g2
FREG_LOAD_2(%g2, f0, f2)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_2(f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
180: sub %o4, 8, %g2
FREG_LOAD_1(%g2, f0)
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
- EX_LD_FP(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_1(f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
190:
-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
subcc %g1, 64, %g1
- EX_LD_FP(LOAD_BLK(%o4, %f0))
- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
add %o4, 64, %o4
bne,pt %xcc, 1b
LOAD(prefetch, %o4 + 64, #one_read)
@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0xf, %o4
and %o2, 0xf, %o2
1: subcc %o4, 0x10, %o4
- EX_LD(LOAD(ldx, %o1, %o5))
+ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
add %o1, 0x08, %o1
- EX_LD(LOAD(ldx, %o1, %g1))
+ EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
sub %o1, 0x08, %o1
- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
add %o1, 0x8, %o1
- EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
+ EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x8, %o2
- EX_LD(LOAD(ldx, %o1, %o5))
- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
- EX_LD(LOAD(lduw, %o1, %o5))
- EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
+ EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
+ EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g1, %o2
1: subcc %g1, 1, %g1
- EX_LD(LOAD(ldub, %o1, %o5))
- EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
+ EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
+ EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
bgu,pt %icc, 1b
add %o1, 1, %o1
@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, GLOBAL_SPARE
andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1, %g2))
+ EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
andn %o2, 0x7, %o4
sllx %g2, %g1, %g2
1: add %o1, 0x8, %o1
- EX_LD(LOAD(ldx, %o1, %g3))
+ EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
subcc %o4, 0x8, %o4
srlx %g3, GLOBAL_SPARE, %o5
or %o5, %g2, %o5
- EX_ST(STORE(stx, %o5, %o0))
+ EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
add %o0, 0x8, %o0
bgu,pt %icc, 1b
sllx %g3, %g1, %g2
@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1:
subcc %o2, 4, %o2
- EX_LD(LOAD(lduw, %o1, %g1))
- EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
+ EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
+ EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
bgu,pt %XCC, 1b
add %o1, 4, %o1
@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32
90:
subcc %o2, 1, %o2
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
+ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
+ EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
index 2e8ee7a..16a286c 100644
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/
-#define EX_LD(x) \
+#define EX_LD(x, y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_LD_FP(x) \
+#define EX_LD_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
index be0bf45..6b0276f 100644
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_ST_FP(x) \
+#define EX_ST_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_asi_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 8e13ee1..75bb93b 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -4,6 +4,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#define GLOBAL_SPARE %g7
@@ -46,22 +47,19 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_LD_FP
-#define EX_LD_FP(x) x
+#define EX_LD_FP(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
+#define EX_ST(x,y) x
#endif
#ifndef EX_ST_FP
-#define EX_ST_FP(x) x
+#define EX_ST_FP(x,y) x
#endif
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
-#endif
#ifndef LOAD
#define LOAD(type,addr,dest) type [addr], dest
@@ -94,6 +92,158 @@
.register %g3,#scratch
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+__restore_asi_fp:
+ VISExitHalf
+__restore_asi:
+ retl
+ wr %g0, ASI_AIUS, %asi
+
+ENTRY(NG4_retl_o2)
+ ba,pt %xcc, __restore_asi
+ mov %o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+ ba,pt %xcc, __restore_asi
+ add %o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+ ba,pt %xcc, __restore_asi
+ add %o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+ add %o5, 4, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+ add %o5, 8, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+ add %o5, 16, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+ add %o5, 24, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+ add %o5, 32, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+ add %g1, 1, %g1
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+ add %g1, 8, %g1
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+ add %o4, 16, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+ add %o4, 24, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+ add %o4, 32, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+ add %o4, 40, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+ add %o4, 48, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+ add %o4, 56, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+ add %o4, 64, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
+ add %o4, 16, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
+ add %o4, 24, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
+ add %o4, 32, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
+ add %o4, 40, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
+ add %o4, 48, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
+ add %o4, 56, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
+ add %o4, 64, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
+#endif
.align 64
.globl FUNC_NAME
@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 51f
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g2, %o0 - 0x01))
+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, .Llarge_aligned
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 8, %o1
subcc %g1, 8, %g1
add %o0, 8, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stx, %g2, %o0 - 0x08))
+ EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
.Llarge_aligned:
/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
andn %o2, 0x3f, %o4
sub %o2, %o4, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
add %o1, 0x40, %o1
- EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+ EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4
- EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
- EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
- EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
- EX_ST(STORE_INIT(%g1, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
+ EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g2, %o0))
+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
- EX_ST(STORE_INIT(%g3, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
- EX_ST(STORE_INIT(%o5, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
+ EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g2, %o0))
+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g3, %o0))
+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x08, %o0
bne,pt %icc, 1b
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %o4, %o2
alignaddr %o1, %g0, %g1
add %o1, %o4, %o1
- EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
-1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
+1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4
- EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
- EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
- EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
- EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
- EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
- EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
faligndata %f0, %f2, %f16
- EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
faligndata %f2, %f4, %f18
add %g1, 0x40, %g1
faligndata %f4, %f6, %f20
@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
- EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
- EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
- EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
- EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
- EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
- EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
- EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
- EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
+ EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
+ EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
+ EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
+ EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
+ EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
+ EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
+ EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
+ EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x40, %o0
bne,pt %icc, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andncc %o2, 0x20 - 1, %o5
be,pn %icc, 2f
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
- EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
- EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
- EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
add %o1, 0x20, %o1
subcc %o5, 0x20, %o5
- EX_ST(STORE(stx, %g1, %o0 + 0x00))
- EX_ST(STORE(stx, %g2, %o0 + 0x08))
- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
- EX_ST(STORE(stx, %o4, %o0 + 0x18))
+ EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
+ EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
+ EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
bne,pt %icc, 1b
add %o0, 0x20, %o0
2: andcc %o2, 0x18, %o5
be,pt %icc, 3f
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1
add %o0, 0x08, %o0
subcc %o5, 0x08, %o5
bne,pt %icc, 1b
- EX_ST(STORE(stx, %g1, %o0 - 0x08))
+ EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
3: brz,pt %o2, .Lexit
cmp %o2, 0x04
bl,pn %icc, .Ltiny
nop
- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 0x04, %o1
add %o0, 0x04, %o0
subcc %o2, 0x04, %o2
bne,pn %icc, .Ltiny
- EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
ba,a,pt %icc, .Lexit
.Lmedium_unaligned:
/* First get dest 8 byte aligned. */
@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 2f
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g2, %o0 - 0x01))
+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
2:
and %o1, 0x7, %g1
brz,pn %g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov 64, %g2
sub %g2, %g1, %g2
andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+ EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
sllx %o4, %g1, %o4
andn %o2, 0x08 - 1, %o5
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1
subcc %o5, 0x08, %o5
srlx %g3, %g2, GLOBAL_SPARE
or GLOBAL_SPARE, %o4, GLOBAL_SPARE
- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
add %o0, 0x08, %o0
bne,pt %icc, 1b
sllx %g3, %g1, %o4
@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %icc, .Lsmall_unaligned
.Ltiny:
- EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+ EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
subcc %o2, 1, %o2
be,pn %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x00))
- EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+ EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
+ EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
subcc %o2, 1, %o2
be,pn %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x01))
- EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+ EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
+ EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
ba,pt %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x02))
+ EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
.Lsmall:
andcc %g2, 0x3, %g0
@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x4 - 1, %o5
sub %o2, %o5, %o2
1:
- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x04, %o1
subcc %o5, 0x04, %o5
add %o0, 0x04, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
brz,pt %o2, .Lexit
nop
ba,a,pt %icc, .Ltiny
.Lsmall_unaligned:
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 1, %o1
add %o0, 1, %o0
subcc %o2, 1, %o2
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g1, %o0 - 0x01))
+ EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
ba,a,pt %icc, .Lexit
.size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
index 5d1e4d1..9cd42fc 100644
--- a/arch/sparc/lib/NGcopy_from_user.S
+++ b/arch/sparc/lib/NGcopy_from_user.S
@@ -3,11 +3,11 @@
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_LD(x) \
+#define EX_LD(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __ret_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
index ff630dc..5c358af 100644
--- a/arch/sparc/lib/NGcopy_to_user.S
+++ b/arch/sparc/lib/NGcopy_to_user.S
@@ -3,11 +3,11 @@
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __ret_one_asi;\
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
index 96a14ca..d88c4ed 100644
--- a/arch/sparc/lib/NGmemcpy.S
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -4,6 +4,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/asi.h>
#include <asm/thread_info.h>
#define GLOBAL_SPARE %g7
@@ -27,15 +28,11 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
+#define EX_ST(x,y) x
#endif
#ifndef LOAD
@@ -79,6 +76,92 @@
.register %g3,#scratch
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+__restore_asi:
+ ret
+ wr %g0, ASI_AIUS, %asi
+ restore
+ENTRY(NG_ret_i2_plus_i4_plus_1)
+ ba,pt %xcc, __restore_asi
+ add %i2, %i5, %i0
+ENDPROC(NG_ret_i2_plus_i4_plus_1)
+ENTRY(NG_ret_i2_plus_g1)
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1)
+ENTRY(NG_ret_i2_plus_g1_minus_8)
+ sub %g1, 8, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_8)
+ENTRY(NG_ret_i2_plus_g1_minus_16)
+ sub %g1, 16, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_16)
+ENTRY(NG_ret_i2_plus_g1_minus_24)
+ sub %g1, 24, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_24)
+ENTRY(NG_ret_i2_plus_g1_minus_32)
+ sub %g1, 32, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_32)
+ENTRY(NG_ret_i2_plus_g1_minus_40)
+ sub %g1, 40, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_40)
+ENTRY(NG_ret_i2_plus_g1_minus_48)
+ sub %g1, 48, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_48)
+ENTRY(NG_ret_i2_plus_g1_minus_56)
+ sub %g1, 56, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_56)
+ENTRY(NG_ret_i2_plus_i4)
+ ba,pt %xcc, __restore_asi
+ add %i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4)
+ENTRY(NG_ret_i2_plus_i4_minus_8)
+ sub %i4, 8, %i4
+ ba,pt %xcc, __restore_asi
+ add %i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4_minus_8)
+ENTRY(NG_ret_i2_plus_8)
+ ba,pt %xcc, __restore_asi
+ add %i2, 8, %i0
+ENDPROC(NG_ret_i2_plus_8)
+ENTRY(NG_ret_i2_plus_4)
+ ba,pt %xcc, __restore_asi
+ add %i2, 4, %i0
+ENDPROC(NG_ret_i2_plus_4)
+ENTRY(NG_ret_i2_plus_1)
+ ba,pt %xcc, __restore_asi
+ add %i2, 1, %i0
+ENDPROC(NG_ret_i2_plus_1)
+ENTRY(NG_ret_i2_plus_g1_plus_1)
+ add %g1, 1, %g1
+ ba,pt %xcc, __restore_asi
+ add %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_plus_1)
+ENTRY(NG_ret_i2)
+ ba,pt %xcc, __restore_asi
+ mov %i2, %i0
+ENDPROC(NG_ret_i2)
+ENTRY(NG_ret_i2_and_7_plus_i4)
+ and %i2, 7, %i2
+ ba,pt %xcc, __restore_asi
+ add %i2, %i4, %i0
+ENDPROC(NG_ret_i2_and_7_plus_i4)
+#endif
+
.align 64
.globl FUNC_NAME
@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
sub %g0, %i4, %i4 ! bytes to align dst
sub %i2, %i4, %i2
1: subcc %i4, 1, %i4
- EX_LD(LOAD(ldub, %i1, %g1))
- EX_ST(STORE(stb, %g1, %o0))
+ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
+ EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
add %i1, 1, %i1
bne,pt %XCC, 1b
add %o0, 1, %o0
@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
and %i4, 0x7, GLOBAL_SPARE
sll GLOBAL_SPARE, 3, GLOBAL_SPARE
mov 64, %i5
- EX_LD(LOAD_TWIN(%i1, %g2, %g3))
+ EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
sub %i5, GLOBAL_SPARE, %i5
mov 16, %o4
mov 32, %o5
@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
srlx WORD3, PRE_SHIFT, TMP; \
or WORD2, TMP, WORD2;
-8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
LOAD(prefetch, %i1 + %i3, #one_read)
- EX_ST(STORE_INIT(%g2, %o0 + 0x00))
- EX_ST(STORE_INIT(%g3, %o0 + 0x08))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
+ EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%o2, %o0 + 0x10))
- EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%g2, %o0 + 0x20))
- EX_ST(STORE_INIT(%g3, %o0 + 0x28))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+ EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%o2, %o0 + 0x30))
- EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1
bne,pt %XCC, 8b
@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
ba,pt %XCC, 60f
add %i1, %i4, %i1
-9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
LOAD(prefetch, %i1 + %i3, #one_read)
- EX_ST(STORE_INIT(%g3, %o0 + 0x00))
- EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
+ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%o3, %o0 + 0x10))
- EX_ST(STORE_INIT(%g2, %o0 + 0x18))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%g3, %o0 + 0x20))
- EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
- EX_ST(STORE_INIT(%o3, %o0 + 0x30))
- EX_ST(STORE_INIT(%g2, %o0 + 0x38))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+ EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1
bne,pt %XCC, 9b
@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
* one twin load ahead, then add 8 back into source when
* we finish the loop.
*/
- EX_LD(LOAD_TWIN(%i1, %o4, %o5))
+ EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
mov 16, %o7
mov 32, %g2
mov 48, %g3
mov 64, %o1
-1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
LOAD(prefetch, %i1 + %o1, #one_read)
- EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
- EX_ST(STORE_INIT(%o2, %o0 + 0x08))
- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
- EX_ST(STORE_INIT(%o3, %o0 + 0x10))
- EX_ST(STORE_INIT(%o4, %o0 + 0x18))
- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
- EX_ST(STORE_INIT(%o5, %o0 + 0x20))
- EX_ST(STORE_INIT(%o2, %o0 + 0x28))
- EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
+ EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
+ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+ EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+ EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1
- EX_ST(STORE_INIT(%o3, %o0 + 0x30))
- EX_ST(STORE_INIT(%o4, %o0 + 0x38))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+ EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1
bne,pt %XCC, 1b
add %o0, 64, %o0
@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
mov 32, %g2
mov 48, %g3
mov 64, %o1
-1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
LOAD(prefetch, %i1 + %o1, #one_read)
- EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
- EX_ST(STORE_INIT(%o5, %o0 + 0x08))
- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
- EX_ST(STORE_INIT(%o2, %o0 + 0x10))
- EX_ST(STORE_INIT(%o3, %o0 + 0x18))
- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+ EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
+ EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
add %i1, 64, %i1
- EX_ST(STORE_INIT(%o4, %o0 + 0x20))
- EX_ST(STORE_INIT(%o5, %o0 + 0x28))
- EX_ST(STORE_INIT(%o2, %o0 + 0x30))
- EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+ EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+ EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1
bne,pt %XCC, 1b
add %o0, 64, %o0
@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
andn %i2, 0xf, %i4
and %i2, 0xf, %i2
1: subcc %i4, 0x10, %i4
- EX_LD(LOAD(ldx, %i1, %o4))
+ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
add %i1, 0x08, %i1
- EX_LD(LOAD(ldx, %i1, %g1))
+ EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
sub %i1, 0x08, %i1
- EX_ST(STORE(stx, %o4, %i1 + %i3))
+ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
add %i1, 0x8, %i1
- EX_ST(STORE(stx, %g1, %i1 + %i3))
+ EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
bgu,pt %XCC, 1b
add %i1, 0x8, %i1
73: andcc %i2, 0x8, %g0
be,pt %XCC, 1f
nop
sub %i2, 0x8, %i2
- EX_LD(LOAD(ldx, %i1, %o4))
- EX_ST(STORE(stx, %o4, %i1 + %i3))
+ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
+ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
add %i1, 0x8, %i1
1: andcc %i2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %i2, 0x4, %i2
- EX_LD(LOAD(lduw, %i1, %i5))
- EX_ST(STORE(stw, %i5, %i1 + %i3))
+ EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
+ EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
add %i1, 0x4, %i1
1: cmp %i2, 0
be,pt %XCC, 85f
@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
sub %i2, %g1, %i2
1: subcc %g1, 1, %g1
- EX_LD(LOAD(ldub, %i1, %i5))
- EX_ST(STORE(stb, %i5, %i1 + %i3))
+ EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
+ EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
bgu,pt %icc, 1b
add %i1, 1, %i1
@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
8: mov 64, %i3
andn %i1, 0x7, %i1
- EX_LD(LOAD(ldx, %i1, %g2))
+ EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
sub %i3, %g1, %i3
andn %i2, 0x7, %i4
sllx %g2, %g1, %g2
1: add %i1, 0x8, %i1
- EX_LD(LOAD(ldx, %i1, %g3))
+ EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
subcc %i4, 0x8, %i4
srlx %g3, %i3, %i5
or %i5, %g2, %i5
- EX_ST(STORE(stx, %i5, %o0))
+ EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
add %o0, 0x8, %o0
bgu,pt %icc, 1b
sllx %g3, %g1, %g2
@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1:
subcc %i2, 4, %i2
- EX_LD(LOAD(lduw, %i1, %g1))
- EX_ST(STORE(stw, %g1, %i1 + %i3))
+ EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
+ EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
bgu,pt %XCC, 1b
add %i1, 4, %i1
@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
.align 32
90:
subcc %i2, 1, %i2
- EX_LD(LOAD(ldub, %i1, %g1))
- EX_ST(STORE(stb, %g1, %i1 + %i3))
+ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
+ EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
bgu,pt %XCC, 90b
add %i1, 1, %i1
ret
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
index ecc5692..bb6ff73 100644
--- a/arch/sparc/lib/U1copy_from_user.S
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
-#define EX_LD(x) \
+#define EX_LD(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_LD_FP(x) \
+#define EX_LD_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_fp;\
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
index 9eea392..ed92ce73 100644
--- a/arch/sparc/lib/U1copy_to_user.S
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_ST_FP(x) \
+#define EX_ST_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_fp;\
+ .word 98b, y; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index 97e1b21..4f0d50b 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -5,6 +5,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/export.h>
@@ -24,21 +25,17 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_LD_FP
-#define EX_LD_FP(x) x
+#define EX_LD_FP(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
+#define EX_ST(x,y) x
#endif
#ifndef EX_ST_FP
-#define EX_ST_FP(x) x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
+#define EX_ST_FP(x,y) x
#endif
#ifndef LOAD
@@ -79,53 +76,169 @@
faligndata %f7, %f8, %f60; \
faligndata %f8, %f9, %f62;
-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
- EX_LD_FP(LOAD_BLK(%src, %fdest)); \
- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
- add %src, 0x40, %src; \
- subcc %len, 0x40, %len; \
- be,pn %xcc, jmptgt; \
- add %dest, 0x40, %dest; \
-
-#define LOOP_CHUNK1(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
-#define LOOP_CHUNK2(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
-#define LOOP_CHUNK3(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
+#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
+ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
+ add %src, 0x40, %src; \
+ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
+ be,pn %xcc, jmptgt; \
+ add %dest, 0x40, %dest; \
+
+#define LOOP_CHUNK1(src, dest, branch_dest) \
+ MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
+#define LOOP_CHUNK2(src, dest, branch_dest) \
+ MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
+#define LOOP_CHUNK3(src, dest, branch_dest) \
+ MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
#define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \
- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
add %dest, 0x40, %dest; \
DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \
- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
add %dest, 0x40, %dest; \
ba,pt %xcc, target; \
nop;
-#define FINISH_VISCHUNK(dest, f0, f1, left) \
- subcc %left, 8, %left;\
- bl,pn %xcc, 95f; \
- faligndata %f0, %f1, %f48; \
- EX_ST_FP(STORE(std, %f48, %dest)); \
+#define FINISH_VISCHUNK(dest, f0, f1) \
+ subcc %g3, 8, %g3; \
+ bl,pn %xcc, 95f; \
+ faligndata %f0, %f1, %f48; \
+ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
add %dest, 8, %dest;
-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
- subcc %left, 8, %left; \
- bl,pn %xcc, 95f; \
+#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
+ subcc %g3, 8, %g3; \
+ bl,pn %xcc, 95f; \
fsrc2 %f0, %f1;
-#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
- UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
+#define UNEVEN_VISCHUNK(dest, f0, f1) \
+ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
ba,a,pt %xcc, 93f;
.register %g2,#scratch
.register %g3,#scratch
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+ENTRY(U1_g1_1_fp)
+ VISExitHalf
+ add %g1, 1, %g1
+ add %g1, %g2, %g1
+ retl
+ add %g1, %o2, %o0
+ENDPROC(U1_g1_1_fp)
+ENTRY(U1_g2_0_fp)
+ VISExitHalf
+ retl
+ add %g2, %o2, %o0
+ENDPROC(U1_g2_0_fp)
+ENTRY(U1_g2_8_fp)
+ VISExitHalf
+ add %g2, 8, %g2
+ retl
+ add %g2, %o2, %o0
+ENDPROC(U1_g2_8_fp)
+ENTRY(U1_gs_0_fp)
+ VISExitHalf
+ add %GLOBAL_SPARE, %g3, %o0
+ retl
+ add %o0, %o2, %o0
+ENDPROC(U1_gs_0_fp)
+ENTRY(U1_gs_80_fp)
+ VISExitHalf
+ add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+ add %GLOBAL_SPARE, %g3, %o0
+ retl
+ add %o0, %o2, %o0
+ENDPROC(U1_gs_80_fp)
+ENTRY(U1_gs_40_fp)
+ VISExitHalf
+ add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
+ add %GLOBAL_SPARE, %g3, %o0
+ retl
+ add %o0, %o2, %o0
+ENDPROC(U1_gs_40_fp)
+ENTRY(U1_g3_0_fp)
+ VISExitHalf
+ retl
+ add %g3, %o2, %o0
+ENDPROC(U1_g3_0_fp)
+ENTRY(U1_g3_8_fp)
+ VISExitHalf
+ add %g3, 8, %g3
+ retl
+ add %g3, %o2, %o0
+ENDPROC(U1_g3_8_fp)
+ENTRY(U1_o2_0_fp)
+ VISExitHalf
+ retl
+ mov %o2, %o0
+ENDPROC(U1_o2_0_fp)
+ENTRY(U1_o2_1_fp)
+ VISExitHalf
+ retl
+ add %o2, 1, %o0
+ENDPROC(U1_o2_1_fp)
+ENTRY(U1_gs_0)
+ VISExitHalf
+ retl
+ add %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0)
+ENTRY(U1_gs_8)
+ VISExitHalf
+ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+ retl
+ add %GLOBAL_SPARE, 0x8, %o0
+ENDPROC(U1_gs_8)
+ENTRY(U1_gs_10)
+ VISExitHalf
+ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+ retl
+ add %GLOBAL_SPARE, 0x10, %o0
+ENDPROC(U1_gs_10)
+ENTRY(U1_o2_0)
+ retl
+ mov %o2, %o0
+ENDPROC(U1_o2_0)
+ENTRY(U1_o2_8)
+ retl
+ add %o2, 8, %o0
+ENDPROC(U1_o2_8)
+ENTRY(U1_o2_4)
+ retl
+ add %o2, 4, %o0
+ENDPROC(U1_o2_4)
+ENTRY(U1_o2_1)
+ retl
+ add %o2, 1, %o0
+ENDPROC(U1_o2_1)
+ENTRY(U1_g1_0)
+ retl
+ add %g1, %o2, %o0
+ENDPROC(U1_g1_0)
+ENTRY(U1_g1_1)
+ add %g1, 1, %g1
+ retl
+ add %g1, %o2, %o0
+ENDPROC(U1_g1_1)
+ENTRY(U1_gs_0_o2_adj)
+ and %o2, 7, %o2
+ retl
+ add %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0_o2_adj)
+ENTRY(U1_gs_8_o2_adj)
+ and %o2, 7, %o2
+ add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
+ retl
+ add %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_8_o2_adj)
+#endif
+
.align 64
.globl FUNC_NAME
@@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1
- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
+ EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
@@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f
alignaddr %o1, %g0, %o1
- EX_LD_FP(LOAD(ldd, %o1, %f4))
-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+ EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0
- EX_ST_FP(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
be,pn %icc, 3f
add %o0, 0x8, %o0
- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f0
- EX_ST_FP(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
bne,pt %icc, 1b
add %o0, 0x8, %o0
@@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %g1, %GLOBAL_SPARE, %g1
subcc %o2, %g3, %o2
- EX_LD_FP(LOAD_BLK(%o1, %f0))
+ EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
add %o1, 0x40, %o1
add %g1, %g3, %g1
- EX_LD_FP(LOAD_BLK(%o1, %f16))
+ EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
add %o1, 0x40, %o1
sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
- EX_LD_FP(LOAD_BLK(%o1, %f32))
+ EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
add %o1, 0x40, %o1
/* There are 8 instances of the unrolled loop,
@@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64
1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
@@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 56f)
1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
@@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 57f)
1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
@@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 58f)
1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
@@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 59f)
1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
@@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 60f)
1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
@@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 61f)
1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
@@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 62f)
1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+ LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+ LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+ LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
@@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_JUMP(o0, f48, 63f)
-40: FINISH_VISCHUNK(o0, f0, f2, g3)
-41: FINISH_VISCHUNK(o0, f2, f4, g3)
-42: FINISH_VISCHUNK(o0, f4, f6, g3)
-43: FINISH_VISCHUNK(o0, f6, f8, g3)
-44: FINISH_VISCHUNK(o0, f8, f10, g3)
-45: FINISH_VISCHUNK(o0, f10, f12, g3)
-46: FINISH_VISCHUNK(o0, f12, f14, g3)
-47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
-48: FINISH_VISCHUNK(o0, f16, f18, g3)
-49: FINISH_VISCHUNK(o0, f18, f20, g3)
-50: FINISH_VISCHUNK(o0, f20, f22, g3)
-51: FINISH_VISCHUNK(o0, f22, f24, g3)
-52: FINISH_VISCHUNK(o0, f24, f26, g3)
-53: FINISH_VISCHUNK(o0, f26, f28, g3)
-54: FINISH_VISCHUNK(o0, f28, f30, g3)
-55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
-56: FINISH_VISCHUNK(o0, f32, f34, g3)
-57: FINISH_VISCHUNK(o0, f34, f36, g3)
-58: FINISH_VISCHUNK(o0, f36, f38, g3)
-59: FINISH_VISCHUNK(o0, f38, f40, g3)
-60: FINISH_VISCHUNK(o0, f40, f42, g3)
-61: FINISH_VISCHUNK(o0, f42, f44, g3)
-62: FINISH_VISCHUNK(o0, f44, f46, g3)
-63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
-
-93: EX_LD_FP(LOAD(ldd, %o1, %f2))
+40: FINISH_VISCHUNK(o0, f0, f2)
+41: FINISH_VISCHUNK(o0, f2, f4)
+42: FINISH_VISCHUNK(o0, f4, f6)
+43: FINISH_VISCHUNK(o0, f6, f8)
+44: FINISH_VISCHUNK(o0, f8, f10)
+45: FINISH_VISCHUNK(o0, f10, f12)
+46: FINISH_VISCHUNK(o0, f12, f14)
+47: UNEVEN_VISCHUNK(o0, f14, f0)
+48: FINISH_VISCHUNK(o0, f16, f18)
+49: FINISH_VISCHUNK(o0, f18, f20)
+50: FINISH_VISCHUNK(o0, f20, f22)
+51: FINISH_VISCHUNK(o0, f22, f24)
+52: FINISH_VISCHUNK(o0, f24, f26)
+53: FINISH_VISCHUNK(o0, f26, f28)
+54: FINISH_VISCHUNK(o0, f28, f30)
+55: UNEVEN_VISCHUNK(o0, f30, f0)
+56: FINISH_VISCHUNK(o0, f32, f34)
+57: FINISH_VISCHUNK(o0, f34, f36)
+58: FINISH_VISCHUNK(o0, f36, f38)
+59: FINISH_VISCHUNK(o0, f38, f40)
+60: FINISH_VISCHUNK(o0, f40, f42)
+61: FINISH_VISCHUNK(o0, f42, f44)
+62: FINISH_VISCHUNK(o0, f44, f46)
+63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
+
+93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
add %o1, 8, %o1
subcc %g3, 8, %g3
faligndata %f0, %f2, %f8
- EX_ST_FP(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bl,pn %xcc, 95f
add %o0, 8, %o0
- EX_LD_FP(LOAD(ldd, %o1, %f0))
+ EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
add %o1, 8, %o1
subcc %g3, 8, %g3
faligndata %f2, %f0, %f8
- EX_ST_FP(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bge,pt %xcc, 93b
add %o0, 8, %o0
95: brz,pt %o2, 2f
mov %g1, %o1
-1: EX_LD_FP(LOAD(ldub, %o1, %o3))
+1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
add %o1, 1, %o1
subcc %o2, 1, %o2
- EX_ST_FP(STORE(stb, %o3, %o0))
+ EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
bne,pt %xcc, 1b
add %o0, 1, %o0
@@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
72: andn %o2, 0xf, %GLOBAL_SPARE
and %o2, 0xf, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
- EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
+ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
add %o1, 0x8, %o1
- EX_ST(STORE(stx, %g1, %o1 + %o3))
+ EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f
nop
- EX_LD(LOAD(ldx, %o1, %o5))
+ EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
sub %o2, 0x8, %o2
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
- EX_LD(LOAD(lduw, %o1, %o5))
+ EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
sub %o2, 0x4, %o2
- EX_ST(STORE(stw, %o5, %o1 + %o3))
+ EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
@@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %g1, %g1
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldub, %o1, %o5))
+1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
subcc %g1, 1, %g1
- EX_ST(STORE(stb, %o5, %o1 + %o3))
+ EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
bgu,pt %icc, 1b
add %o1, 1, %o1
@@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, %o3
andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1, %g2))
+ EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
sub %o3, %g1, %o3
andn %o2, 0x7, %GLOBAL_SPARE
sllx %g2, %g1, %g2
-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
add %o1, 0x8, %o1
srlx %g3, %o3, %o5
or %o5, %g2, %o5
- EX_ST(STORE(stx, %o5, %o0))
+ EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
add %o0, 0x8, %o0
bgu,pt %icc, 1b
sllx %g3, %g1, %g2
@@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bne,pn %XCC, 90f
sub %o0, %o1, %o3
-1: EX_LD(LOAD(lduw, %o1, %g1))
+1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
subcc %o2, 4, %o2
- EX_ST(STORE(stw, %g1, %o1 + %o3))
+ EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
bgu,pt %XCC, 1b
add %o1, 4, %o1
@@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov EX_RETVAL(%o4), %o0
.align 32
-90: EX_LD(LOAD(ldub, %o1, %g1))
+90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
subcc %o2, 1, %o2
- EX_ST(STORE(stb, %g1, %o1 + %o3))
+ EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
index 88ad73d..db73010 100644
--- a/arch/sparc/lib/U3copy_from_user.S
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
-#define EX_LD(x) \
+#define EX_LD(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_LD_FP(x) \
+#define EX_LD_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
index 845139d..c4ee858 100644
--- a/arch/sparc/lib/U3copy_to_user.S
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
-#define EX_ST(x) \
+#define EX_ST(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, y; \
.text; \
.align 4;
-#define EX_ST_FP(x) \
+#define EX_ST_FP(x,y) \
98: x; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one_fp;\
+ .word 98b, y##_fp; \
.text; \
.align 4;
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
index 491ee69..54f9870 100644
--- a/arch/sparc/lib/U3memcpy.S
+++ b/arch/sparc/lib/U3memcpy.S
@@ -4,6 +4,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#define GLOBAL_SPARE %g7
@@ -22,21 +23,17 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_LD_FP
-#define EX_LD_FP(x) x
+#define EX_LD_FP(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
+#define EX_ST(x,y) x
#endif
#ifndef EX_ST_FP
-#define EX_ST_FP(x) x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
+#define EX_ST_FP(x,y) x
#endif
#ifndef LOAD
@@ -77,6 +74,87 @@
*/
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+__restore_fp:
+ VISExitHalf
+ retl
+ nop
+ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+ add %g1, 1, %g1
+ add %g2, %g1, %g2
+ ba,pt %xcc, __restore_fp
+ add %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+ENTRY(U3_retl_o2_plus_g2_fp)
+ ba,pt %xcc, __restore_fp
+ add %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_fp)
+ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
+ add %g2, 8, %g2
+ ba,pt %xcc, __restore_fp
+ add %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
+ENTRY(U3_retl_o2)
+ retl
+ mov %o2, %o0
+ENDPROC(U3_retl_o2)
+ENTRY(U3_retl_o2_plus_1)
+ retl
+ add %o2, 1, %o0
+ENDPROC(U3_retl_o2_plus_1)
+ENTRY(U3_retl_o2_plus_4)
+ retl
+ add %o2, 4, %o0
+ENDPROC(U3_retl_o2_plus_4)
+ENTRY(U3_retl_o2_plus_8)
+ retl
+ add %o2, 8, %o0
+ENDPROC(U3_retl_o2_plus_8)
+ENTRY(U3_retl_o2_plus_g1_plus_1)
+ add %g1, 1, %g1
+ retl
+ add %o2, %g1, %o0
+ENDPROC(U3_retl_o2_plus_g1_plus_1)
+ENTRY(U3_retl_o2_fp)
+ ba,pt %xcc, __restore_fp
+ mov %o2, %o0
+ENDPROC(U3_retl_o2_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+ sll %o3, 6, %o3
+ add %o3, 0x80, %o3
+ ba,pt %xcc, __restore_fp
+ add %o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+ sll %o3, 6, %o3
+ add %o3, 0x40, %o3
+ ba,pt %xcc, __restore_fp
+ add %o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+ENTRY(U3_retl_o2_plus_GS_plus_0x10)
+ add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+ retl
+ add %o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
+ENTRY(U3_retl_o2_plus_GS_plus_0x08)
+ add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
+ retl
+ add %o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ENTRY(U3_retl_o2_and_7_plus_GS)
+ and %o2, 7, %o2
+ retl
+ add %o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS)
+ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+ add GLOBAL_SPARE, 8, GLOBAL_SPARE
+ and %o2, 7, %o2
+ retl
+ add %o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+#endif
+
.align 64
/* The cheetah's flexible spine, oversized liver, enlarged heart,
@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1
- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
+ EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f
alignaddr %o1, %g0, %o1
- EX_LD_FP(LOAD(ldd, %o1, %f4))
-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+ EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0
- EX_ST_FP(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
be,pn %icc, 3f
add %o0, 0x8, %o0
- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f2
- EX_ST_FP(STORE(std, %f2, %o0))
+ EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
bne,pt %icc, 1b
add %o0, 0x8, %o0
@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
LOAD(prefetch, %o1 + 0x080, #one_read)
LOAD(prefetch, %o1 + 0x0c0, #one_read)
LOAD(prefetch, %o1 + 0x100, #one_read)
- EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
LOAD(prefetch, %o1 + 0x140, #one_read)
- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
LOAD(prefetch, %o1 + 0x180, #one_read)
- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f0, %f2, %f16
- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
faligndata %f2, %f4, %f18
- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
faligndata %f4, %f6, %f20
- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
faligndata %f6, %f8, %f22
- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
faligndata %f8, %f10, %f24
- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
faligndata %f10, %f12, %f26
- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
add %o1, 0x40, %o1
@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64
1:
- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f12, %f14, %f28
- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f14, %f0, %f30
- EX_ST_FP(STORE_BLK(%f16, %o0))
- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f0, %f2, %f16
add %o0, 0x40, %o0
- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f2, %f4, %f18
- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f4, %f6, %f20
- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
subcc %o3, 0x01, %o3
faligndata %f6, %f8, %f22
- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f8, %f10, %f24
- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f10, %f12, %f26
bg,pt %XCC, 1b
@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
/* Finally we copy the last full 64-byte block. */
2:
- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f12, %f14, %f28
- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f14, %f0, %f30
- EX_ST_FP(STORE_BLK(%f16, %o0))
- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f0, %f2, %f16
- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f2, %f4, %f18
- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f4, %f6, %f20
- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f6, %f8, %f22
- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f8, %f10, %f24
cmp %g1, 0
be,pt %XCC, 1f
add %o0, 0x40, %o0
- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
1: faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
- EX_ST_FP(STORE_BLK(%f16, %o0))
+ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
add %o0, 0x40, %o0
add %o1, 0x40, %o1
membar #Sync
@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g2, %o2
be,a,pt %XCC, 1f
- EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
-1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
- EX_ST_FP(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
be,pn %XCC, 2f
add %o0, 0x8, %o0
- EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f2, %f0, %f8
- EX_ST_FP(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
bne,pn %XCC, 1b
add %o0, 0x8, %o0
@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
- EX_LD(LOAD(ldx, %o1, %o5))
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x8, %o1
+ sub %o2, 8, %o2
1: andcc %o2, 0x4, %g0
be,pt %icc, 1f
nop
- EX_LD(LOAD(lduw, %o1, %o5))
- EX_ST(STORE(stw, %o5, %o1 + %o3))
+ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
+ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x4, %o1
+ sub %o2, 4, %o2
1: andcc %o2, 0x2, %g0
be,pt %icc, 1f
nop
- EX_LD(LOAD(lduh, %o1, %o5))
- EX_ST(STORE(sth, %o5, %o1 + %o3))
+ EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
+ EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x2, %o1
+ sub %o2, 2, %o2
1: andcc %o2, 0x1, %g0
be,pt %icc, 85f
nop
- EX_LD(LOAD(ldub, %o1, %o5))
+ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
ba,pt %xcc, 85f
- EX_ST(STORE(stb, %o5, %o1 + %o3))
+ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
.align 64
70: /* 16 < len <= 64 */
@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0xf, GLOBAL_SPARE
and %o2, 0xf, %o2
1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
- EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
- EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
+ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
add %o1, 0x8, %o1
- EX_ST(STORE(stx, %g1, %o1 + %o3))
+ EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x8, %o2
- EX_LD(LOAD(ldx, %o1, %o5))
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
+ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
- EX_LD(LOAD(lduw, %o1, %o5))
- EX_ST(STORE(stw, %o5, %o1 + %o3))
+ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
+ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g1, %o2
1: subcc %g1, 1, %g1
- EX_LD(LOAD(ldub, %o1, %o5))
- EX_ST(STORE(stb, %o5, %o1 + %o3))
+ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
+ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
bgu,pt %icc, 1b
add %o1, 1, %o1
@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, %o3
andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1, %g2))
+ EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
sub %o3, %g1, %o3
andn %o2, 0x7, GLOBAL_SPARE
sllx %g2, %g1, %g2
-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
add %o1, 0x8, %o1
srlx %g3, %o3, %o5
or %o5, %g2, %o5
- EX_ST(STORE(stx, %o5, %o0))
+ EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
add %o0, 0x8, %o0
bgu,pt %icc, 1b
sllx %g3, %g1, %g2
@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1:
subcc %o2, 4, %o2
- EX_LD(LOAD(lduw, %o1, %g1))
- EX_ST(STORE(stw, %g1, %o1 + %o3))
+ EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
+ EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
bgu,pt %XCC, 1b
add %o1, 4, %o1
@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32
90:
subcc %o2, 1, %o2
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o1 + %o3))
+ EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
+ EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
index 482de09..0252b21 100644
--- a/arch/sparc/lib/copy_in_user.S
+++ b/arch/sparc/lib/copy_in_user.S
@@ -9,18 +9,33 @@
#define XCC xcc
-#define EX(x,y) \
+#define EX(x,y,z) \
98: x,y; \
.section __ex_table,"a";\
.align 4; \
- .word 98b, __retl_one; \
+ .word 98b, z; \
.text; \
.align 4;
+#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
+#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
+#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
+
.register %g2,#scratch
.register %g3,#scratch
.text
+__retl_o4_plus_8:
+ add %o4, %o2, %o4
+ retl
+ add %o4, 8, %o0
+__retl_o2_plus_4:
+ retl
+ add %o2, 4, %o0
+__retl_o2_plus_1:
+ retl
+ add %o2, 1, %o0
+
.align 32
/* Don't try to get too fancy here, just nice and
@@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
- EX(ldxa [%o1] %asi, %o5)
- EX(stxa %o5, [%o0] %asi)
+ EX_O4(ldxa [%o1] %asi, %o5)
+ EX_O4(stxa %o5, [%o0] %asi)
add %o1, 0x8, %o1
bgu,pt %XCC, 1b
add %o0, 0x8, %o0
@@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
- EX(lduwa [%o1] %asi, %o5)
- EX(stwa %o5, [%o0] %asi)
+ EX_O2_4(lduwa [%o1] %asi, %o5)
+ EX_O2_4(stwa %o5, [%o0] %asi)
add %o1, 0x4, %o1
add %o0, 0x4, %o0
1: cmp %o2, 0
@@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
82:
subcc %o2, 4, %o2
- EX(lduwa [%o1] %asi, %g1)
- EX(stwa %g1, [%o0] %asi)
+ EX_O2_4(lduwa [%o1] %asi, %g1)
+ EX_O2_4(stwa %g1, [%o0] %asi)
add %o1, 4, %o1
bgu,pt %XCC, 82b
add %o0, 4, %o0
@@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
.align 32
90:
subcc %o2, 1, %o2
- EX(lduba [%o1] %asi, %g1)
- EX(stba %g1, [%o0] %asi)
+ EX_O2_1(lduba [%o1] %asi, %g1)
+ EX_O2_1(stba %g1, [%o0] %asi)
add %o1, 1, %o1
bgu,pt %XCC, 90b
add %o0, 1, %o0
diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
deleted file mode 100644
index ac96ae2..0000000
--- a/arch/sparc/lib/user_fixup.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* user_fixup.c: Fix up user copy faults.
- *
- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/uaccess.h>
-
-/* Calculating the exact fault address when using
- * block loads and stores can be very complicated.
- *
- * Instead of trying to be clever and handling all
- * of the cases, just fix things up simply here.
- */
-
-static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
-{
- unsigned long fault_addr = current_thread_info()->fault_address;
- unsigned long end = start + size;
-
- if (fault_addr < start || fault_addr >= end) {
- *offset = 0;
- } else {
- *offset = fault_addr - start;
- size = end - fault_addr;
- }
- return size;
-}
-
-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
-{
- unsigned long offset;
-
- size = compute_size((unsigned long) from, size, &offset);
- if (likely(size))
- memset(to + offset, 0, size);
-
- return size;
-}
-EXPORT_SYMBOL(copy_from_user_fixup);
-
-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
-{
- unsigned long offset;
-
- return compute_size((unsigned long) to, size, &offset);
-}
-EXPORT_SYMBOL(copy_to_user_fixup);
-
-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
-{
- unsigned long fault_addr = current_thread_info()->fault_address;
- unsigned long start = (unsigned long) to;
- unsigned long end = start + size;
-
- if (fault_addr >= start && fault_addr < end)
- return end - fault_addr;
-
- start = (unsigned long) from;
- end = start + size;
- if (fault_addr >= start && fault_addr < end)
- return end - fault_addr;
-
- return size;
-}
-EXPORT_SYMBOL(copy_in_user_fixup);
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 4e06750..cd0e32b 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -238,7 +238,8 @@ slow:
pages += nr;
ret = get_user_pages_unlocked(start,
- (end - start) >> PAGE_SHIFT, write, 0, pages);
+ (end - start) >> PAGE_SHIFT, pages,
+ write ? FOLL_WRITE : 0);
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 439784b..37aa537 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -802,8 +802,10 @@ struct mdesc_mblock {
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;
+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask);
-static unsigned long ra_to_pa(unsigned long addr)
+static unsigned long __init ra_to_pa(unsigned long addr)
{
int i;
@@ -819,8 +821,11 @@ static unsigned long ra_to_pa(unsigned long addr)
return addr;
}
-static int find_node(unsigned long addr)
+static int __init find_node(unsigned long addr)
{
+ static bool search_mdesc = true;
+ static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
+ static int last_index;
int i;
addr = ra_to_pa(addr);
@@ -830,13 +835,30 @@ static int find_node(unsigned long addr)
if ((addr & p->mask) == p->val)
return i;
}
- /* The following condition has been observed on LDOM guests.*/
- WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
- " rule. Some physical memory will be owned by node 0.");
- return 0;
+ /* The following condition has been observed on LDOM guests because
+ * node_masks only contains the best latency mask and value.
+ * LDOM guest's mdesc can contain a single latency group to
+ * cover multiple address range. Print warning message only if the
+ * address cannot be found in node_masks nor mdesc.
+ */
+ if ((search_mdesc) &&
+ ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
+ /* find the available node in the mdesc */
+ last_index = find_numa_node_for_addr(addr, &last_mem_mask);
+ numadbg("find_node: latency group for address 0x%lx is %d\n",
+ addr, last_index);
+ if ((last_index < 0) || (last_index >= num_node_masks)) {
+ /* WARN_ONCE() and use default group 0 */
+ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
+ search_mdesc = false;
+ last_index = 0;
+ }
+ }
+
+ return last_index;
}
-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
{
*nid = find_node(start);
start += PAGE_SIZE;
@@ -1160,6 +1182,41 @@ int __node_distance(int from, int to)
return numa_latency[from][to];
}
+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask)
+{
+ struct mdesc_handle *md = mdesc_grab();
+ u64 node, arc;
+ int i = 0;
+
+ node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
+ if (node == MDESC_NODE_NULL)
+ goto out;
+
+ mdesc_for_each_node_by_name(md, node, "group") {
+ mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(md, arc);
+ struct mdesc_mlgroup *m = find_mlgroup(target);
+
+ if (!m)
+ continue;
+ if ((pa & m->mask) == m->match) {
+ if (pnode_mask) {
+ pnode_mask->mask = m->mask;
+ pnode_mask->val = m->match;
+ }
+ mdesc_release(md);
+ return i;
+ }
+ }
+ i++;
+ }
+
+out:
+ mdesc_release(md);
+ return -1;
+}
+
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
int i;
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index f2b7711..e20fbba 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
return (tag == (vaddr >> 22));
}
+static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
+{
+ unsigned long idx;
+
+ for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
+ struct tsb *ent = &swapper_tsb[idx];
+ unsigned long match = idx << 13;
+
+ match |= (ent->tag << 22);
+ if (match >= start && match < end)
+ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+ }
+}
+
/* TSB flushes need only occur on the processor initiating the address
* space modification, not on each cpu the address space has run on.
* Only the TLB flush needs that treatment.
@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
{
unsigned long v;
+ if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
+ return flush_tsb_kernel_range_scan(start, end);
+
for (v = start; v < end; v += PAGE_SIZE) {
unsigned long hash = tsb_hash(v, PAGE_SHIFT,
KERNEL_TSB_NENTRIES);
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index b4f4733..5d2fd6c 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -30,7 +30,7 @@
.text
.align 32
.globl __flush_tlb_mm
-__flush_tlb_mm: /* 18 insns */
+__flush_tlb_mm: /* 19 insns */
/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
.align 32
.globl __flush_tlb_pending
-__flush_tlb_pending: /* 26 insns */
+__flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
.align 32
.globl __flush_tlb_kernel_range
-__flush_tlb_kernel_range: /* 16 insns */
+__flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
+ sub %o1, %o0, %o3
+ srlx %o3, 18, %o4
+ brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
sethi %hi(PAGE_SIZE), %o4
- sub %o1, %o0, %o3
sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
retl
nop
nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+__spitfire_flush_tlb_kernel_range_slow:
+ mov 63 * 8, %o4
+1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
+ andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
+ bne,pn %xcc, 2f
+ mov TLB_TAG_ACCESS, %o3
+ stxa %g0, [%o3] ASI_IMMU
+ stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
+ membar #Sync
+2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
+ andcc %o3, 0x40, %g0
+ bne,pn %xcc, 2f
+ mov TLB_TAG_ACCESS, %o3
+ stxa %g0, [%o3] ASI_DMMU
+ stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
+ membar #Sync
+2: sub %o4, 8, %o4
+ brgez,pt %o4, 1b
+ nop
+ retl
+ nop
__spitfire_flush_tlb_mm_slow:
rdpr %pstate, %g1
@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
retl
wrpr %g7, 0x0, %pstate
+__cheetah_flush_tlb_kernel_range: /* 31 insns */
+ /* %o0=start, %o1=end */
+ cmp %o0, %o1
+ be,pn %xcc, 2f
+ sub %o1, %o0, %o3
+ srlx %o3, 18, %o4
+ brnz,pn %o4, 3f
+ sethi %hi(PAGE_SIZE), %o4
+ sub %o3, %o4, %o3
+ or %o0, 0x20, %o0 ! Nucleus
+1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
+ stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
+ membar #Sync
+ brnz,pt %o3, 1b
+ sub %o3, %o4, %o3
+2: sethi %hi(KERNBASE), %o3
+ flush %o3
+ retl
+ nop
+3: mov 0x80, %o4
+ stxa %g0, [%o4] ASI_DMMU_DEMAP
+ membar #Sync
+ stxa %g0, [%o4] ASI_IMMU_DEMAP
+ membar #Sync
+ retl
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
#ifdef DCACHE_ALIASING_POSSIBLE
__cheetah_flush_dcache_page: /* 11 insns */
sethi %hi(PAGE_OFFSET), %g1
@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
ret
restore
-__hypervisor_flush_tlb_mm: /* 10 insns */
+__hypervisor_flush_tlb_mm: /* 19 insns */
mov %o0, %o2 /* ARG2: mmu context */
mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
- brnz,pn %o0, __hypervisor_tlb_tl0_error
+ brnz,pn %o0, 1f
mov HV_FAST_MMU_DEMAP_CTX, %o1
retl
nop
+1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
+ jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
-__hypervisor_flush_tlb_page: /* 11 insns */
+__hypervisor_flush_tlb_page: /* 22 insns */
/* %o0 = context, %o1 = vaddr */
mov %o0, %g2
mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
- brnz,pn %o0, __hypervisor_tlb_tl0_error
+ brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
retl
nop
+1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
-__hypervisor_flush_tlb_pending: /* 16 insns */
+__hypervisor_flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
sllx %o1, 3, %g1
mov %o2, %g2
@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
- brnz,pn %o0, __hypervisor_tlb_tl0_error
+ brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g1, 1b
nop
retl
nop
+1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
-__hypervisor_flush_tlb_kernel_range: /* 16 insns */
+__hypervisor_flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
- sethi %hi(PAGE_SIZE), %g3
- mov %o0, %g1
- sub %o1, %g1, %g2
+ sub %o1, %o0, %g2
+ srlx %g2, 18, %g3
+ brnz,pn %g3, 4f
+ mov %o0, %g1
+ sethi %hi(PAGE_SIZE), %g3
sub %g2, %g3, %g2
1: add %g1, %g2, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP
- brnz,pn %o0, __hypervisor_tlb_tl0_error
+ brnz,pn %o0, 3f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g2, 1b
sub %g2, %g3, %g2
2: retl
nop
+3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
+ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+ nop
+4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
+ mov 0, %o1 /* ARG1: CPU lists unimplemented */
+ mov 0, %o2 /* ARG2: mmu context == nucleus */
+ mov HV_MMU_ALL, %o3 /* ARG3: flags */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, 3b
+ mov HV_FAST_MMU_DEMAP_CTX, %o1
+ retl
+ nop
#ifdef DCACHE_ALIASING_POSSIBLE
/* XXX Niagara and friends have an 8K cache, so no aliasing is
@@ -394,43 +511,6 @@ tlb_patch_one:
retl
nop
- .globl cheetah_patch_cachetlbops
-cheetah_patch_cachetlbops:
- save %sp, -128, %sp
-
- sethi %hi(__flush_tlb_mm), %o0
- or %o0, %lo(__flush_tlb_mm), %o0
- sethi %hi(__cheetah_flush_tlb_mm), %o1
- or %o1, %lo(__cheetah_flush_tlb_mm), %o1
- call tlb_patch_one
- mov 19, %o2
-
- sethi %hi(__flush_tlb_page), %o0
- or %o0, %lo(__flush_tlb_page), %o0
- sethi %hi(__cheetah_flush_tlb_page), %o1
- or %o1, %lo(__cheetah_flush_tlb_page), %o1
- call tlb_patch_one
- mov 22, %o2
-
- sethi %hi(__flush_tlb_pending), %o0
- or %o0, %lo(__flush_tlb_pending), %o0
- sethi %hi(__cheetah_flush_tlb_pending), %o1
- or %o1, %lo(__cheetah_flush_tlb_pending), %o1
- call tlb_patch_one
- mov 27, %o2
-
-#ifdef DCACHE_ALIASING_POSSIBLE
- sethi %hi(__flush_dcache_page), %o0
- or %o0, %lo(__flush_dcache_page), %o0
- sethi %hi(__cheetah_flush_dcache_page), %o1
- or %o1, %lo(__cheetah_flush_dcache_page), %o1
- call tlb_patch_one
- mov 11, %o2
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
- ret
- restore
-
#ifdef CONFIG_SMP
/* These are all called by the slaves of a cross call, at
* trap level 1, with interrupts fully disabled.
@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
*/
.align 32
.globl xcall_flush_tlb_mm
-xcall_flush_tlb_mm: /* 21 insns */
+xcall_flush_tlb_mm: /* 24 insns */
mov PRIMARY_CONTEXT, %g2
ldxa [%g2] ASI_DMMU, %g3
srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
nop
nop
nop
+ nop
+ nop
+ nop
.globl xcall_flush_tlb_page
-xcall_flush_tlb_page: /* 17 insns */
+xcall_flush_tlb_page: /* 20 insns */
/* %g5=context, %g1=vaddr */
mov PRIMARY_CONTEXT, %g4
ldxa [%g4] ASI_DMMU, %g2
@@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
retry
nop
nop
+ nop
+ nop
+ nop
.globl xcall_flush_tlb_kernel_range
-xcall_flush_tlb_kernel_range: /* 25 insns */
+xcall_flush_tlb_kernel_range: /* 44 insns */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
- add %g2, 1, %g2
+ srlx %g3, 18, %g2
+ brnz,pn %g2, 2f
+ add %g2, 1, %g2
sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
brnz,pt %g3, 1b
sub %g3, %g2, %g3
retry
- nop
- nop
+2: mov 63 * 8, %g1
+1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
+ andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
+ bne,pn %xcc, 2f
+ mov TLB_TAG_ACCESS, %g2
+ stxa %g0, [%g2] ASI_IMMU
+ stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
+ membar #Sync
+2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
+ andcc %g2, 0x40, %g0
+ bne,pn %xcc, 2f
+ mov TLB_TAG_ACCESS, %g2
+ stxa %g0, [%g2] ASI_DMMU
+ stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
+ membar #Sync
+2: sub %g1, 8, %g1
+ brgez,pt %g1, 1b
+ nop
+ retry
nop
nop
nop
@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
retry
+__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
+ sethi %hi(PAGE_SIZE - 1), %g2
+ or %g2, %lo(PAGE_SIZE - 1), %g2
+ andn %g1, %g2, %g1
+ andn %g7, %g2, %g7
+ sub %g7, %g1, %g3
+ srlx %g3, 18, %g2
+ brnz,pn %g2, 2f
+ add %g2, 1, %g2
+ sub %g3, %g2, %g3
+ or %g1, 0x20, %g1 ! Nucleus
+1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
+ stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
+ membar #Sync
+ brnz,pt %g3, 1b
+ sub %g3, %g2, %g3
+ retry
+2: mov 0x80, %g2
+ stxa %g0, [%g2] ASI_DMMU_DEMAP
+ membar #Sync
+ stxa %g0, [%g2] ASI_IMMU_DEMAP
+ membar #Sync
+ retry
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
#ifdef DCACHE_ALIASING_POSSIBLE
.align 32
.globl xcall_flush_dcache_page_cheetah
@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
ba,a,pt %xcc, rtrap
.globl __hypervisor_xcall_flush_tlb_mm
-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
mov %o0, %g2
mov %o1, %g3
@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
mov HV_FAST_MMU_DEMAP_CTX, %g6
- brnz,pn %o0, __hypervisor_tlb_xcall_error
+ brnz,pn %o0, 1f
mov %o0, %g5
mov %g2, %o0
mov %g3, %o1
@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
mov %g7, %o5
membar #Sync
retry
+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+ nop
.globl __hypervisor_xcall_flush_tlb_page
-__hypervisor_xcall_flush_tlb_page: /* 17 insns */
+__hypervisor_xcall_flush_tlb_page: /* 20 insns */
/* %g5=ctx, %g1=vaddr */
mov %o0, %g2
mov %o1, %g3
@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6
- brnz,a,pn %o0, __hypervisor_tlb_xcall_error
+ brnz,a,pn %o0, 1f
mov %o0, %g5
mov %g2, %o0
mov %g3, %o1
mov %g4, %o2
membar #Sync
retry
+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+ nop
.globl __hypervisor_xcall_flush_tlb_kernel_range
-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
+__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
+ srlx %g3, 18, %g7
add %g2, 1, %g2
sub %g3, %g2, %g3
mov %o0, %g2
mov %o1, %g4
- mov %o2, %g7
+ brnz,pn %g7, 2f
+ mov %o2, %g7
1: add %g1, %g3, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6
- brnz,pn %o0, __hypervisor_tlb_xcall_error
+ brnz,pn %o0, 1f
mov %o0, %g5
sethi %hi(PAGE_SIZE), %o2
brnz,pt %g3, 1b
sub %g3, %o2, %g3
- mov %g2, %o0
+5: mov %g2, %o0
mov %g4, %o1
mov %g7, %o2
membar #Sync
retry
+1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
+ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+ nop
+2: mov %o3, %g1
+ mov %o5, %g3
+ mov 0, %o0 /* ARG0: CPU lists unimplemented */
+ mov 0, %o1 /* ARG1: CPU lists unimplemented */
+ mov 0, %o2 /* ARG2: mmu context == nucleus */
+ mov HV_MMU_ALL, %o3 /* ARG3: flags */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+ mov %g1, %o3
+ brz,pt %o0, 5b
+ mov %g3, %o5
+ mov HV_FAST_MMU_DEMAP_CTX, %g6
+ ba,pt %xcc, 1b
+ clr %g5
/* These just get rescheduled to PIL vectors. */
.globl xcall_call_function
@@ -809,6 +985,58 @@ xcall_kgdb_capture:
#endif /* CONFIG_SMP */
+ .globl cheetah_patch_cachetlbops
+cheetah_patch_cachetlbops:
+ save %sp, -128, %sp
+
+ sethi %hi(__flush_tlb_mm), %o0
+ or %o0, %lo(__flush_tlb_mm), %o0
+ sethi %hi(__cheetah_flush_tlb_mm), %o1
+ or %o1, %lo(__cheetah_flush_tlb_mm), %o1
+ call tlb_patch_one
+ mov 19, %o2
+
+ sethi %hi(__flush_tlb_page), %o0
+ or %o0, %lo(__flush_tlb_page), %o0
+ sethi %hi(__cheetah_flush_tlb_page), %o1
+ or %o1, %lo(__cheetah_flush_tlb_page), %o1
+ call tlb_patch_one
+ mov 22, %o2
+
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__cheetah_flush_tlb_pending), %o1
+ or %o1, %lo(__cheetah_flush_tlb_pending), %o1
+ call tlb_patch_one
+ mov 27, %o2
+
+ sethi %hi(__flush_tlb_kernel_range), %o0
+ or %o0, %lo(__flush_tlb_kernel_range), %o0
+ sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+ mov 31, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+ sethi %hi(__flush_dcache_page), %o0
+ or %o0, %lo(__flush_dcache_page), %o0
+ sethi %hi(__cheetah_flush_dcache_page), %o1
+ or %o1, %lo(__cheetah_flush_dcache_page), %o1
+ call tlb_patch_one
+ mov 11, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+ sethi %hi(xcall_flush_tlb_kernel_range), %o0
+ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
+ sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+ mov 44, %o2
+#endif /* CONFIG_SMP */
+
+ ret
+ restore
.globl hypervisor_patch_cachetlbops
hypervisor_patch_cachetlbops:
@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
sethi %hi(__hypervisor_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
call tlb_patch_one
- mov 10, %o2
+ mov 19, %o2
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__hypervisor_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_flush_tlb_page), %o1
call tlb_patch_one
- mov 11, %o2
+ mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__hypervisor_flush_tlb_pending), %o1
or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
call tlb_patch_one
- mov 16, %o2
+ mov 27, %o2
sethi %hi(__flush_tlb_kernel_range), %o0
or %o0, %lo(__flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
call tlb_patch_one
- mov 16, %o2
+ mov 31, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
call tlb_patch_one
- mov 21, %o2
+ mov 24, %o2
sethi %hi(xcall_flush_tlb_page), %o0
or %o0, %lo(xcall_flush_tlb_page), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
call tlb_patch_one
- mov 17, %o2
+ mov 20, %o2
sethi %hi(xcall_flush_tlb_kernel_range), %o0
or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
call tlb_patch_one
- mov 25, %o2
+ mov 44, %o2
#endif /* CONFIG_SMP */
ret
OpenPOWER on IntegriCloud